1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 MatCheckPreallocated(A,1); 1794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1795 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1796 break; 1797 case MAT_ROW_ORIENTED: 1798 a->roworiented = flg; 1799 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1800 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1801 break; 1802 case MAT_NEW_DIAGONALS: 1803 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1804 break; 1805 case MAT_IGNORE_OFF_PROC_ENTRIES: 1806 a->donotstash = flg; 1807 break; 1808 case MAT_SPD: 1809 A->spd_set = PETSC_TRUE; 1810 A->spd = flg; 1811 if (flg) { 1812 A->symmetric = PETSC_TRUE; 1813 A->structurally_symmetric = PETSC_TRUE; 1814 A->symmetric_set = PETSC_TRUE; 1815 A->structurally_symmetric_set = PETSC_TRUE; 1816 } 1817 break; 1818 case MAT_SYMMETRIC: 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_STRUCTURALLY_SYMMETRIC: 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 break; 1824 case MAT_HERMITIAN: 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 break; 1827 case MAT_SYMMETRY_ETERNAL: 1828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1829 break; 1830 default: 1831 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1832 } 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatGetRow_MPIAIJ" 1838 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1839 { 1840 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1841 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1842 PetscErrorCode ierr; 1843 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1844 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1845 PetscInt *cmap,*idx_p; 1846 1847 PetscFunctionBegin; 1848 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1849 mat->getrowactive = PETSC_TRUE; 1850 1851 if (!mat->rowvalues && (idx || v)) { 1852 /* 1853 allocate enough space to hold information from the longest row. 1854 */ 1855 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1856 PetscInt max = 1,tmp; 1857 for (i=0; i<matin->rmap->n; i++) { 1858 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1859 if (max < tmp) { max = tmp; } 1860 } 1861 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1862 } 1863 1864 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1865 lrow = row - rstart; 1866 1867 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1868 if (!v) {pvA = 0; pvB = 0;} 1869 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1870 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1871 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1872 nztot = nzA + nzB; 1873 1874 cmap = mat->garray; 1875 if (v || idx) { 1876 if (nztot) { 1877 /* Sort by increasing column numbers, assuming A and B already sorted */ 1878 PetscInt imark = -1; 1879 if (v) { 1880 *v = v_p = mat->rowvalues; 1881 for (i=0; i<nzB; i++) { 1882 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1883 else break; 1884 } 1885 imark = i; 1886 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1887 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1888 } 1889 if (idx) { 1890 *idx = idx_p = mat->rowindices; 1891 if (imark > -1) { 1892 for (i=0; i<imark; i++) { 1893 idx_p[i] = cmap[cworkB[i]]; 1894 } 1895 } else { 1896 for (i=0; i<nzB; i++) { 1897 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1898 else break; 1899 } 1900 imark = i; 1901 } 1902 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1903 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1904 } 1905 } else { 1906 if (idx) *idx = 0; 1907 if (v) *v = 0; 1908 } 1909 } 1910 *nz = nztot; 1911 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1912 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1918 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1919 { 1920 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1921 1922 PetscFunctionBegin; 1923 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1924 aij->getrowactive = PETSC_FALSE; 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatNorm_MPIAIJ" 1930 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1934 PetscErrorCode ierr; 1935 PetscInt i,j,cstart = mat->cmap->rstart; 1936 PetscReal sum = 0.0; 1937 MatScalar *v; 1938 1939 PetscFunctionBegin; 1940 if (aij->size == 1) { 1941 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1942 } else { 1943 if (type == NORM_FROBENIUS) { 1944 v = amat->a; 1945 for (i=0; i<amat->nz; i++) { 1946 #if defined(PETSC_USE_COMPLEX) 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 #else 1949 sum += (*v)*(*v); v++; 1950 #endif 1951 } 1952 v = bmat->a; 1953 for (i=0; i<bmat->nz; i++) { 1954 #if defined(PETSC_USE_COMPLEX) 1955 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1956 #else 1957 sum += (*v)*(*v); v++; 1958 #endif 1959 } 1960 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1961 *norm = PetscSqrtReal(*norm); 1962 } else if (type == NORM_1) { /* max column norm */ 1963 PetscReal *tmp,*tmp2; 1964 PetscInt *jj,*garray = aij->garray; 1965 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1966 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1967 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1968 *norm = 0.0; 1969 v = amat->a; jj = amat->j; 1970 for (j=0; j<amat->nz; j++) { 1971 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1972 } 1973 v = bmat->a; jj = bmat->j; 1974 for (j=0; j<bmat->nz; j++) { 1975 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1976 } 1977 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1978 for (j=0; j<mat->cmap->N; j++) { 1979 if (tmp2[j] > *norm) *norm = tmp2[j]; 1980 } 1981 ierr = PetscFree(tmp);CHKERRQ(ierr); 1982 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1983 } else if (type == NORM_INFINITY) { /* max row norm */ 1984 PetscReal ntemp = 0.0; 1985 for (j=0; j<aij->A->rmap->n; j++) { 1986 v = amat->a + amat->i[j]; 1987 sum = 0.0; 1988 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1989 sum += PetscAbsScalar(*v); v++; 1990 } 1991 v = bmat->a + bmat->i[j]; 1992 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1993 sum += PetscAbsScalar(*v); v++; 1994 } 1995 if (sum > ntemp) ntemp = sum; 1996 } 1997 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1998 } else { 1999 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2000 } 2001 } 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatTranspose_MPIAIJ" 2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2010 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2011 PetscErrorCode ierr; 2012 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2013 PetscInt cstart=A->cmap->rstart,ncol; 2014 Mat B; 2015 MatScalar *array; 2016 2017 PetscFunctionBegin; 2018 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2019 2020 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2021 ai = Aloc->i; aj = Aloc->j; 2022 bi = Bloc->i; bj = Bloc->j; 2023 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2024 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2025 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2026 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2027 for (i=0; i<ai[ma]; i++){ 2028 d_nnz[aj[i]] ++; 2029 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2030 } 2031 2032 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2033 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2034 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2035 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2036 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2037 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2038 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2039 } else { 2040 B = *matout; 2041 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2042 for (i=0; i<ai[ma]; i++){ 2043 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2044 } 2045 } 2046 2047 /* copy over the A part */ 2048 array = Aloc->a; 2049 row = A->rmap->rstart; 2050 for (i=0; i<ma; i++) { 2051 ncol = ai[i+1]-ai[i]; 2052 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2053 row++; array += ncol; aj += ncol; 2054 } 2055 aj = Aloc->j; 2056 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2057 2058 /* copy over the B part */ 2059 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2060 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2061 array = Bloc->a; 2062 row = A->rmap->rstart; 2063 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2064 cols_tmp = cols; 2065 for (i=0; i<mb; i++) { 2066 ncol = bi[i+1]-bi[i]; 2067 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2068 row++; array += ncol; cols_tmp += ncol; 2069 } 2070 ierr = PetscFree(cols);CHKERRQ(ierr); 2071 2072 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2075 *matout = B; 2076 } else { 2077 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2078 } 2079 PetscFunctionReturn(0); 2080 } 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2084 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2085 { 2086 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2087 Mat a = aij->A,b = aij->B; 2088 PetscErrorCode ierr; 2089 PetscInt s1,s2,s3; 2090 2091 PetscFunctionBegin; 2092 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2093 if (rr) { 2094 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2095 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2096 /* Overlap communication with computation. */ 2097 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 } 2099 if (ll) { 2100 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2101 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2102 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2103 } 2104 /* scale the diagonal block */ 2105 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2106 2107 if (rr) { 2108 /* Do a scatter end and then right scale the off-diagonal block */ 2109 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2110 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2111 } 2112 2113 PetscFunctionReturn(0); 2114 } 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatEqual_MPIAIJ" 2130 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2131 { 2132 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2133 Mat a,b,c,d; 2134 PetscBool flg; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 a = matA->A; b = matA->B; 2139 c = matB->A; d = matB->B; 2140 2141 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2142 if (flg) { 2143 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2144 } 2145 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatCopy_MPIAIJ" 2151 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2152 { 2153 PetscErrorCode ierr; 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2155 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2156 2157 PetscFunctionBegin; 2158 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2159 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2160 /* because of the column compression in the off-processor part of the matrix a->B, 2161 the number of columns in a->B and b->B may be different, hence we cannot call 2162 the MatCopy() directly on the two parts. If need be, we can provide a more 2163 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2164 then copying the submatrices */ 2165 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2166 } else { 2167 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2168 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2169 } 2170 PetscFunctionReturn(0); 2171 } 2172 2173 #undef __FUNCT__ 2174 #define __FUNCT__ "MatSetUp_MPIAIJ" 2175 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2176 { 2177 PetscErrorCode ierr; 2178 2179 PetscFunctionBegin; 2180 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 #undef __FUNCT__ 2185 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2186 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2187 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2188 { 2189 PetscInt i,m=Y->rmap->N; 2190 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2191 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2192 const PetscInt *xi = x->i,*yi = y->i; 2193 2194 PetscFunctionBegin; 2195 /* Set the number of nonzeros in the new matrix */ 2196 for(i=0; i<m; i++) { 2197 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2198 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2199 nnz[i] = 0; 2200 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2201 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2202 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2203 nnz[i]++; 2204 } 2205 for (; k<nzy; k++) nnz[i]++; 2206 } 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatAXPY_MPIAIJ" 2212 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2213 { 2214 PetscErrorCode ierr; 2215 PetscInt i; 2216 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2217 PetscBLASInt bnz,one=1; 2218 Mat_SeqAIJ *x,*y; 2219 2220 PetscFunctionBegin; 2221 if (str == SAME_NONZERO_PATTERN) { 2222 PetscScalar alpha = a; 2223 x = (Mat_SeqAIJ *)xx->A->data; 2224 y = (Mat_SeqAIJ *)yy->A->data; 2225 bnz = PetscBLASIntCast(x->nz); 2226 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2227 x = (Mat_SeqAIJ *)xx->B->data; 2228 y = (Mat_SeqAIJ *)yy->B->data; 2229 bnz = PetscBLASIntCast(x->nz); 2230 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2231 } else if (str == SUBSET_NONZERO_PATTERN) { 2232 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2233 2234 x = (Mat_SeqAIJ *)xx->B->data; 2235 y = (Mat_SeqAIJ *)yy->B->data; 2236 if (y->xtoy && y->XtoY != xx->B) { 2237 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2238 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2239 } 2240 if (!y->xtoy) { /* get xtoy */ 2241 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2242 y->XtoY = xx->B; 2243 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2244 } 2245 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2246 } else { 2247 Mat B; 2248 PetscInt *nnz_d,*nnz_o; 2249 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2250 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2251 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2252 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2253 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2254 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2255 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2256 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2257 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2258 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2259 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2260 ierr = MatHeaderReplace(Y,B); 2261 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2262 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 2267 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatConjugate_MPIAIJ" 2271 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2272 { 2273 #if defined(PETSC_USE_COMPLEX) 2274 PetscErrorCode ierr; 2275 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2276 2277 PetscFunctionBegin; 2278 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2279 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2280 #else 2281 PetscFunctionBegin; 2282 #endif 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatRealPart_MPIAIJ" 2288 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2289 { 2290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2291 PetscErrorCode ierr; 2292 2293 PetscFunctionBegin; 2294 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2295 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2296 PetscFunctionReturn(0); 2297 } 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2301 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2302 { 2303 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2304 PetscErrorCode ierr; 2305 2306 PetscFunctionBegin; 2307 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2308 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #ifdef PETSC_HAVE_PBGL 2313 2314 #include <boost/parallel/mpi/bsp_process_group.hpp> 2315 #include <boost/graph/distributed/ilu_default_graph.hpp> 2316 #include <boost/graph/distributed/ilu_0_block.hpp> 2317 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2318 #include <boost/graph/distributed/petsc/interface.hpp> 2319 #include <boost/multi_array.hpp> 2320 #include <boost/parallel/distributed_property_map->hpp> 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2324 /* 2325 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2326 */ 2327 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2328 { 2329 namespace petsc = boost::distributed::petsc; 2330 2331 namespace graph_dist = boost::graph::distributed; 2332 using boost::graph::distributed::ilu_default::process_group_type; 2333 using boost::graph::ilu_permuted; 2334 2335 PetscBool row_identity, col_identity; 2336 PetscContainer c; 2337 PetscInt m, n, M, N; 2338 PetscErrorCode ierr; 2339 2340 PetscFunctionBegin; 2341 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2342 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2343 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2344 if (!row_identity || !col_identity) { 2345 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2346 } 2347 2348 process_group_type pg; 2349 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2350 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2351 lgraph_type& level_graph = *lgraph_p; 2352 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2353 2354 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2355 ilu_permuted(level_graph); 2356 2357 /* put together the new matrix */ 2358 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2359 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2360 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2361 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2362 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2363 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2364 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2365 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2366 2367 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2368 ierr = PetscContainerSetPointer(c, lgraph_p); 2369 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2370 ierr = PetscContainerDestroy(&c); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2376 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2377 { 2378 PetscFunctionBegin; 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatSolve_MPIAIJ" 2384 /* 2385 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2386 */ 2387 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2388 { 2389 namespace graph_dist = boost::graph::distributed; 2390 2391 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2392 lgraph_type* lgraph_p; 2393 PetscContainer c; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2398 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2399 ierr = VecCopy(b, x);CHKERRQ(ierr); 2400 2401 PetscScalar* array_x; 2402 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2403 PetscInt sx; 2404 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2405 2406 PetscScalar* array_b; 2407 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2408 PetscInt sb; 2409 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2410 2411 lgraph_type& level_graph = *lgraph_p; 2412 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2413 2414 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2415 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2416 ref_x(array_x, boost::extents[num_vertices(graph)]); 2417 2418 typedef boost::iterator_property_map<array_ref_type::iterator, 2419 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2420 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2421 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2422 2423 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2424 2425 PetscFunctionReturn(0); 2426 } 2427 #endif 2428 2429 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2430 PetscInt nzlocal,nsends,nrecvs; 2431 PetscMPIInt *send_rank,*recv_rank; 2432 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2433 PetscScalar *sbuf_a,**rbuf_a; 2434 PetscErrorCode (*Destroy)(Mat); 2435 } Mat_Redundant; 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2439 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2440 { 2441 PetscErrorCode ierr; 2442 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2443 PetscInt i; 2444 2445 PetscFunctionBegin; 2446 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2447 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2448 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2449 for (i=0; i<redund->nrecvs; i++){ 2450 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2451 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2452 } 2453 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2454 ierr = PetscFree(redund);CHKERRQ(ierr); 2455 PetscFunctionReturn(0); 2456 } 2457 2458 #undef __FUNCT__ 2459 #define __FUNCT__ "MatDestroy_MatRedundant" 2460 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2461 { 2462 PetscErrorCode ierr; 2463 PetscContainer container; 2464 Mat_Redundant *redund=PETSC_NULL; 2465 2466 PetscFunctionBegin; 2467 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2468 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2469 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2470 A->ops->destroy = redund->Destroy; 2471 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2472 if (A->ops->destroy) { 2473 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2474 } 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2480 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2481 { 2482 PetscMPIInt rank,size; 2483 MPI_Comm comm=((PetscObject)mat)->comm; 2484 PetscErrorCode ierr; 2485 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2486 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2487 PetscInt *rowrange=mat->rmap->range; 2488 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2489 Mat A=aij->A,B=aij->B,C=*matredundant; 2490 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2491 PetscScalar *sbuf_a; 2492 PetscInt nzlocal=a->nz+b->nz; 2493 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2494 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2495 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2496 MatScalar *aworkA,*aworkB; 2497 PetscScalar *vals; 2498 PetscMPIInt tag1,tag2,tag3,imdex; 2499 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2500 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2501 MPI_Status recv_status,*send_status; 2502 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2503 PetscInt **rbuf_j=PETSC_NULL; 2504 PetscScalar **rbuf_a=PETSC_NULL; 2505 Mat_Redundant *redund=PETSC_NULL; 2506 PetscContainer container; 2507 2508 PetscFunctionBegin; 2509 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2510 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2511 2512 if (reuse == MAT_REUSE_MATRIX) { 2513 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2514 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2515 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2516 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2517 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2518 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2519 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2520 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2521 2522 nsends = redund->nsends; 2523 nrecvs = redund->nrecvs; 2524 send_rank = redund->send_rank; 2525 recv_rank = redund->recv_rank; 2526 sbuf_nz = redund->sbuf_nz; 2527 rbuf_nz = redund->rbuf_nz; 2528 sbuf_j = redund->sbuf_j; 2529 sbuf_a = redund->sbuf_a; 2530 rbuf_j = redund->rbuf_j; 2531 rbuf_a = redund->rbuf_a; 2532 } 2533 2534 if (reuse == MAT_INITIAL_MATRIX){ 2535 PetscMPIInt subrank,subsize; 2536 PetscInt nleftover,np_subcomm; 2537 /* get the destination processors' id send_rank, nsends and nrecvs */ 2538 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2539 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2540 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2541 np_subcomm = size/nsubcomm; 2542 nleftover = size - nsubcomm*np_subcomm; 2543 nsends = 0; nrecvs = 0; 2544 for (i=0; i<size; i++){ /* i=rank*/ 2545 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2546 send_rank[nsends] = i; nsends++; 2547 recv_rank[nrecvs++] = i; 2548 } 2549 } 2550 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2551 i = size-nleftover-1; 2552 j = 0; 2553 while (j < nsubcomm - nleftover){ 2554 send_rank[nsends++] = i; 2555 i--; j++; 2556 } 2557 } 2558 2559 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2560 for (i=0; i<nleftover; i++){ 2561 recv_rank[nrecvs++] = size-nleftover+i; 2562 } 2563 } 2564 2565 /* allocate sbuf_j, sbuf_a */ 2566 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2567 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2568 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2569 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2570 2571 /* copy mat's local entries into the buffers */ 2572 if (reuse == MAT_INITIAL_MATRIX){ 2573 rownz_max = 0; 2574 rptr = sbuf_j; 2575 cols = sbuf_j + rend-rstart + 1; 2576 vals = sbuf_a; 2577 rptr[0] = 0; 2578 for (i=0; i<rend-rstart; i++){ 2579 row = i + rstart; 2580 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2581 ncols = nzA + nzB; 2582 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2583 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2584 /* load the column indices for this row into cols */ 2585 lwrite = 0; 2586 for (l=0; l<nzB; l++) { 2587 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2588 vals[lwrite] = aworkB[l]; 2589 cols[lwrite++] = ctmp; 2590 } 2591 } 2592 for (l=0; l<nzA; l++){ 2593 vals[lwrite] = aworkA[l]; 2594 cols[lwrite++] = cstart + cworkA[l]; 2595 } 2596 for (l=0; l<nzB; l++) { 2597 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2598 vals[lwrite] = aworkB[l]; 2599 cols[lwrite++] = ctmp; 2600 } 2601 } 2602 vals += ncols; 2603 cols += ncols; 2604 rptr[i+1] = rptr[i] + ncols; 2605 if (rownz_max < ncols) rownz_max = ncols; 2606 } 2607 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2608 } else { /* only copy matrix values into sbuf_a */ 2609 rptr = sbuf_j; 2610 vals = sbuf_a; 2611 rptr[0] = 0; 2612 for (i=0; i<rend-rstart; i++){ 2613 row = i + rstart; 2614 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2615 ncols = nzA + nzB; 2616 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2617 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2618 lwrite = 0; 2619 for (l=0; l<nzB; l++) { 2620 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2621 } 2622 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2623 for (l=0; l<nzB; l++) { 2624 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2625 } 2626 vals += ncols; 2627 rptr[i+1] = rptr[i] + ncols; 2628 } 2629 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2630 2631 /* send nzlocal to others, and recv other's nzlocal */ 2632 /*--------------------------------------------------*/ 2633 if (reuse == MAT_INITIAL_MATRIX){ 2634 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2635 s_waits2 = s_waits3 + nsends; 2636 s_waits1 = s_waits2 + nsends; 2637 r_waits1 = s_waits1 + nsends; 2638 r_waits2 = r_waits1 + nrecvs; 2639 r_waits3 = r_waits2 + nrecvs; 2640 } else { 2641 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2642 r_waits3 = s_waits3 + nsends; 2643 } 2644 2645 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2646 if (reuse == MAT_INITIAL_MATRIX){ 2647 /* get new tags to keep the communication clean */ 2648 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2649 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2650 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2651 2652 /* post receives of other's nzlocal */ 2653 for (i=0; i<nrecvs; i++){ 2654 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* send nzlocal to others */ 2657 for (i=0; i<nsends; i++){ 2658 sbuf_nz[i] = nzlocal; 2659 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2660 } 2661 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2662 count = nrecvs; 2663 while (count) { 2664 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2665 recv_rank[imdex] = recv_status.MPI_SOURCE; 2666 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2667 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2668 2669 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2670 rbuf_nz[imdex] += i + 2; 2671 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2672 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2673 count--; 2674 } 2675 /* wait on sends of nzlocal */ 2676 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2677 /* send mat->i,j to others, and recv from other's */ 2678 /*------------------------------------------------*/ 2679 for (i=0; i<nsends; i++){ 2680 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2681 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2682 } 2683 /* wait on receives of mat->i,j */ 2684 /*------------------------------*/ 2685 count = nrecvs; 2686 while (count) { 2687 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2688 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2689 count--; 2690 } 2691 /* wait on sends of mat->i,j */ 2692 /*---------------------------*/ 2693 if (nsends) { 2694 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2695 } 2696 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2697 2698 /* post receives, send and receive mat->a */ 2699 /*----------------------------------------*/ 2700 for (imdex=0; imdex<nrecvs; imdex++) { 2701 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2702 } 2703 for (i=0; i<nsends; i++){ 2704 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2705 } 2706 count = nrecvs; 2707 while (count) { 2708 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2709 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2710 count--; 2711 } 2712 if (nsends) { 2713 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2714 } 2715 2716 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2717 2718 /* create redundant matrix */ 2719 /*-------------------------*/ 2720 if (reuse == MAT_INITIAL_MATRIX){ 2721 /* compute rownz_max for preallocation */ 2722 for (imdex=0; imdex<nrecvs; imdex++){ 2723 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2724 rptr = rbuf_j[imdex]; 2725 for (i=0; i<j; i++){ 2726 ncols = rptr[i+1] - rptr[i]; 2727 if (rownz_max < ncols) rownz_max = ncols; 2728 } 2729 } 2730 2731 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2732 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2733 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2734 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2735 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2736 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2737 } else { 2738 C = *matredundant; 2739 } 2740 2741 /* insert local matrix entries */ 2742 rptr = sbuf_j; 2743 cols = sbuf_j + rend-rstart + 1; 2744 vals = sbuf_a; 2745 for (i=0; i<rend-rstart; i++){ 2746 row = i + rstart; 2747 ncols = rptr[i+1] - rptr[i]; 2748 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2749 vals += ncols; 2750 cols += ncols; 2751 } 2752 /* insert received matrix entries */ 2753 for (imdex=0; imdex<nrecvs; imdex++){ 2754 rstart = rowrange[recv_rank[imdex]]; 2755 rend = rowrange[recv_rank[imdex]+1]; 2756 rptr = rbuf_j[imdex]; 2757 cols = rbuf_j[imdex] + rend-rstart + 1; 2758 vals = rbuf_a[imdex]; 2759 for (i=0; i<rend-rstart; i++){ 2760 row = i + rstart; 2761 ncols = rptr[i+1] - rptr[i]; 2762 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2763 vals += ncols; 2764 cols += ncols; 2765 } 2766 } 2767 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2770 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2771 if (reuse == MAT_INITIAL_MATRIX) { 2772 PetscContainer container; 2773 *matredundant = C; 2774 /* create a supporting struct and attach it to C for reuse */ 2775 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2776 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2777 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2778 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2779 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2780 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2781 2782 redund->nzlocal = nzlocal; 2783 redund->nsends = nsends; 2784 redund->nrecvs = nrecvs; 2785 redund->send_rank = send_rank; 2786 redund->recv_rank = recv_rank; 2787 redund->sbuf_nz = sbuf_nz; 2788 redund->rbuf_nz = rbuf_nz; 2789 redund->sbuf_j = sbuf_j; 2790 redund->sbuf_a = sbuf_a; 2791 redund->rbuf_j = rbuf_j; 2792 redund->rbuf_a = rbuf_a; 2793 2794 redund->Destroy = C->ops->destroy; 2795 C->ops->destroy = MatDestroy_MatRedundant; 2796 } 2797 PetscFunctionReturn(0); 2798 } 2799 2800 #undef __FUNCT__ 2801 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2802 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2803 { 2804 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2805 PetscErrorCode ierr; 2806 PetscInt i,*idxb = 0; 2807 PetscScalar *va,*vb; 2808 Vec vtmp; 2809 2810 PetscFunctionBegin; 2811 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2812 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2813 if (idx) { 2814 for (i=0; i<A->rmap->n; i++) { 2815 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2816 } 2817 } 2818 2819 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2820 if (idx) { 2821 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2822 } 2823 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2824 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2825 2826 for (i=0; i<A->rmap->n; i++){ 2827 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2828 va[i] = vb[i]; 2829 if (idx) idx[i] = a->garray[idxb[i]]; 2830 } 2831 } 2832 2833 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2834 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2835 ierr = PetscFree(idxb);CHKERRQ(ierr); 2836 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2842 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2843 { 2844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2845 PetscErrorCode ierr; 2846 PetscInt i,*idxb = 0; 2847 PetscScalar *va,*vb; 2848 Vec vtmp; 2849 2850 PetscFunctionBegin; 2851 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2852 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2853 if (idx) { 2854 for (i=0; i<A->cmap->n; i++) { 2855 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2856 } 2857 } 2858 2859 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2860 if (idx) { 2861 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2862 } 2863 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2864 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2865 2866 for (i=0; i<A->rmap->n; i++){ 2867 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2868 va[i] = vb[i]; 2869 if (idx) idx[i] = a->garray[idxb[i]]; 2870 } 2871 } 2872 2873 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2874 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2875 ierr = PetscFree(idxb);CHKERRQ(ierr); 2876 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2877 PetscFunctionReturn(0); 2878 } 2879 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2882 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2883 { 2884 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2885 PetscInt n = A->rmap->n; 2886 PetscInt cstart = A->cmap->rstart; 2887 PetscInt *cmap = mat->garray; 2888 PetscInt *diagIdx, *offdiagIdx; 2889 Vec diagV, offdiagV; 2890 PetscScalar *a, *diagA, *offdiagA; 2891 PetscInt r; 2892 PetscErrorCode ierr; 2893 2894 PetscFunctionBegin; 2895 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2896 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2897 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2898 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2899 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2900 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2901 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2902 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2903 for(r = 0; r < n; ++r) { 2904 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2905 a[r] = diagA[r]; 2906 idx[r] = cstart + diagIdx[r]; 2907 } else { 2908 a[r] = offdiagA[r]; 2909 idx[r] = cmap[offdiagIdx[r]]; 2910 } 2911 } 2912 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2913 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2914 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2915 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2916 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2917 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2918 PetscFunctionReturn(0); 2919 } 2920 2921 #undef __FUNCT__ 2922 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2923 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2924 { 2925 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2926 PetscInt n = A->rmap->n; 2927 PetscInt cstart = A->cmap->rstart; 2928 PetscInt *cmap = mat->garray; 2929 PetscInt *diagIdx, *offdiagIdx; 2930 Vec diagV, offdiagV; 2931 PetscScalar *a, *diagA, *offdiagA; 2932 PetscInt r; 2933 PetscErrorCode ierr; 2934 2935 PetscFunctionBegin; 2936 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2937 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2938 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2939 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2940 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2941 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2942 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2943 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2944 for(r = 0; r < n; ++r) { 2945 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2946 a[r] = diagA[r]; 2947 idx[r] = cstart + diagIdx[r]; 2948 } else { 2949 a[r] = offdiagA[r]; 2950 idx[r] = cmap[offdiagIdx[r]]; 2951 } 2952 } 2953 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2955 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2956 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2957 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2958 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2959 PetscFunctionReturn(0); 2960 } 2961 2962 #undef __FUNCT__ 2963 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2964 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2965 { 2966 PetscErrorCode ierr; 2967 Mat *dummy; 2968 2969 PetscFunctionBegin; 2970 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2971 *newmat = *dummy; 2972 ierr = PetscFree(dummy);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 2976 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2977 2978 #undef __FUNCT__ 2979 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2980 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2981 { 2982 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 2991 /* -------------------------------------------------------------------*/ 2992 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2993 MatGetRow_MPIAIJ, 2994 MatRestoreRow_MPIAIJ, 2995 MatMult_MPIAIJ, 2996 /* 4*/ MatMultAdd_MPIAIJ, 2997 MatMultTranspose_MPIAIJ, 2998 MatMultTransposeAdd_MPIAIJ, 2999 #ifdef PETSC_HAVE_PBGL 3000 MatSolve_MPIAIJ, 3001 #else 3002 0, 3003 #endif 3004 0, 3005 0, 3006 /*10*/ 0, 3007 0, 3008 0, 3009 MatSOR_MPIAIJ, 3010 MatTranspose_MPIAIJ, 3011 /*15*/ MatGetInfo_MPIAIJ, 3012 MatEqual_MPIAIJ, 3013 MatGetDiagonal_MPIAIJ, 3014 MatDiagonalScale_MPIAIJ, 3015 MatNorm_MPIAIJ, 3016 /*20*/ MatAssemblyBegin_MPIAIJ, 3017 MatAssemblyEnd_MPIAIJ, 3018 MatSetOption_MPIAIJ, 3019 MatZeroEntries_MPIAIJ, 3020 /*24*/ MatZeroRows_MPIAIJ, 3021 0, 3022 #ifdef PETSC_HAVE_PBGL 3023 0, 3024 #else 3025 0, 3026 #endif 3027 0, 3028 0, 3029 /*29*/ MatSetUp_MPIAIJ, 3030 #ifdef PETSC_HAVE_PBGL 3031 0, 3032 #else 3033 0, 3034 #endif 3035 0, 3036 0, 3037 0, 3038 /*34*/ MatDuplicate_MPIAIJ, 3039 0, 3040 0, 3041 0, 3042 0, 3043 /*39*/ MatAXPY_MPIAIJ, 3044 MatGetSubMatrices_MPIAIJ, 3045 MatIncreaseOverlap_MPIAIJ, 3046 MatGetValues_MPIAIJ, 3047 MatCopy_MPIAIJ, 3048 /*44*/ MatGetRowMax_MPIAIJ, 3049 MatScale_MPIAIJ, 3050 0, 3051 0, 3052 MatZeroRowsColumns_MPIAIJ, 3053 /*49*/ 0, 3054 0, 3055 0, 3056 0, 3057 0, 3058 /*54*/ MatFDColoringCreate_MPIAIJ, 3059 0, 3060 MatSetUnfactored_MPIAIJ, 3061 0, /* MatPermute_MPIAIJ, impl currently broken */ 3062 0, 3063 /*59*/ MatGetSubMatrix_MPIAIJ, 3064 MatDestroy_MPIAIJ, 3065 MatView_MPIAIJ, 3066 0, 3067 0, 3068 /*64*/ 0, 3069 0, 3070 0, 3071 0, 3072 0, 3073 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3074 MatGetRowMinAbs_MPIAIJ, 3075 0, 3076 MatSetColoring_MPIAIJ, 3077 #if defined(PETSC_HAVE_ADIC) 3078 MatSetValuesAdic_MPIAIJ, 3079 #else 3080 0, 3081 #endif 3082 MatSetValuesAdifor_MPIAIJ, 3083 /*75*/ MatFDColoringApply_AIJ, 3084 0, 3085 0, 3086 0, 3087 0, 3088 /*80*/ 0, 3089 0, 3090 0, 3091 /*83*/ MatLoad_MPIAIJ, 3092 0, 3093 0, 3094 0, 3095 0, 3096 0, 3097 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3098 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3099 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3100 MatPtAP_Basic, 3101 MatPtAPSymbolic_MPIAIJ, 3102 /*94*/ MatPtAPNumeric_MPIAIJ, 3103 0, 3104 0, 3105 0, 3106 0, 3107 /*99*/ 0, 3108 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3109 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3110 MatConjugate_MPIAIJ, 3111 0, 3112 /*104*/MatSetValuesRow_MPIAIJ, 3113 MatRealPart_MPIAIJ, 3114 MatImaginaryPart_MPIAIJ, 3115 0, 3116 0, 3117 /*109*/0, 3118 MatGetRedundantMatrix_MPIAIJ, 3119 MatGetRowMin_MPIAIJ, 3120 0, 3121 0, 3122 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3123 0, 3124 0, 3125 0, 3126 0, 3127 /*119*/0, 3128 0, 3129 0, 3130 0, 3131 MatGetMultiProcBlock_MPIAIJ, 3132 /*124*/MatFindNonZeroRows_MPIAIJ, 3133 MatGetColumnNorms_MPIAIJ, 3134 MatInvertBlockDiagonal_MPIAIJ, 3135 0, 3136 MatGetSubMatricesParallel_MPIAIJ, 3137 /*129*/0, 3138 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3139 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3140 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3141 0, 3142 /*134*/0, 3143 0, 3144 0, 3145 0, 3146 0 3147 }; 3148 3149 /* ----------------------------------------------------------------------------------------*/ 3150 3151 EXTERN_C_BEGIN 3152 #undef __FUNCT__ 3153 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3154 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3155 { 3156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3157 PetscErrorCode ierr; 3158 3159 PetscFunctionBegin; 3160 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3161 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164 EXTERN_C_END 3165 3166 EXTERN_C_BEGIN 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3169 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3170 { 3171 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3172 PetscErrorCode ierr; 3173 3174 PetscFunctionBegin; 3175 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3176 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3177 PetscFunctionReturn(0); 3178 } 3179 EXTERN_C_END 3180 3181 EXTERN_C_BEGIN 3182 #undef __FUNCT__ 3183 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3184 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3185 { 3186 Mat_MPIAIJ *b; 3187 PetscErrorCode ierr; 3188 PetscInt i; 3189 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3190 3191 PetscFunctionBegin; 3192 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3193 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3194 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3195 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3196 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3197 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3198 3199 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3200 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3201 if (d_nnz) { 3202 for (i=0; i<B->rmap->n; i++) { 3203 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3204 } 3205 } 3206 if (o_nnz) { 3207 for (i=0; i<B->rmap->n; i++) { 3208 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3209 } 3210 } 3211 b = (Mat_MPIAIJ*)B->data; 3212 3213 if (!B->preallocated) { 3214 /* Explicitly create 2 MATSEQAIJ matrices. */ 3215 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3216 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3217 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3218 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3219 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3220 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3221 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3222 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3223 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3224 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3225 } 3226 3227 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3228 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3229 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3230 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3231 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3232 B->preallocated = PETSC_TRUE; 3233 PetscFunctionReturn(0); 3234 } 3235 EXTERN_C_END 3236 3237 #undef __FUNCT__ 3238 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3239 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3240 { 3241 Mat mat; 3242 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3243 PetscErrorCode ierr; 3244 3245 PetscFunctionBegin; 3246 *newmat = 0; 3247 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3248 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3249 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3250 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3251 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3252 a = (Mat_MPIAIJ*)mat->data; 3253 3254 mat->factortype = matin->factortype; 3255 mat->rmap->bs = matin->rmap->bs; 3256 mat->cmap->bs = matin->cmap->bs; 3257 mat->assembled = PETSC_TRUE; 3258 mat->insertmode = NOT_SET_VALUES; 3259 mat->preallocated = PETSC_TRUE; 3260 3261 a->size = oldmat->size; 3262 a->rank = oldmat->rank; 3263 a->donotstash = oldmat->donotstash; 3264 a->roworiented = oldmat->roworiented; 3265 a->rowindices = 0; 3266 a->rowvalues = 0; 3267 a->getrowactive = PETSC_FALSE; 3268 3269 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3270 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3271 3272 if (oldmat->colmap) { 3273 #if defined (PETSC_USE_CTABLE) 3274 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3275 #else 3276 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3277 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3278 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3279 #endif 3280 } else a->colmap = 0; 3281 if (oldmat->garray) { 3282 PetscInt len; 3283 len = oldmat->B->cmap->n; 3284 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3285 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3286 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3287 } else a->garray = 0; 3288 3289 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3290 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3291 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3292 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3293 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3294 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3295 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3296 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3297 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3298 *newmat = mat; 3299 PetscFunctionReturn(0); 3300 } 3301 3302 3303 3304 #undef __FUNCT__ 3305 #define __FUNCT__ "MatLoad_MPIAIJ" 3306 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3307 { 3308 PetscScalar *vals,*svals; 3309 MPI_Comm comm = ((PetscObject)viewer)->comm; 3310 PetscErrorCode ierr; 3311 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3312 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3313 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3314 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3315 PetscInt cend,cstart,n,*rowners,sizesset=1; 3316 int fd; 3317 3318 PetscFunctionBegin; 3319 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3320 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3321 if (!rank) { 3322 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3323 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3324 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3325 } 3326 3327 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3328 3329 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3330 M = header[1]; N = header[2]; 3331 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3332 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3333 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3334 3335 /* If global sizes are set, check if they are consistent with that given in the file */ 3336 if (sizesset) { 3337 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3338 } 3339 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3340 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3341 3342 /* determine ownership of all rows */ 3343 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3344 else m = newMat->rmap->n; /* Set by user */ 3345 3346 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3347 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3348 3349 /* First process needs enough room for process with most rows */ 3350 if (!rank) { 3351 mmax = rowners[1]; 3352 for (i=2; i<size; i++) { 3353 mmax = PetscMax(mmax,rowners[i]); 3354 } 3355 } else mmax = m; 3356 3357 rowners[0] = 0; 3358 for (i=2; i<=size; i++) { 3359 rowners[i] += rowners[i-1]; 3360 } 3361 rstart = rowners[rank]; 3362 rend = rowners[rank+1]; 3363 3364 /* distribute row lengths to all processors */ 3365 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3366 if (!rank) { 3367 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3368 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3369 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3370 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3371 for (j=0; j<m; j++) { 3372 procsnz[0] += ourlens[j]; 3373 } 3374 for (i=1; i<size; i++) { 3375 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3376 /* calculate the number of nonzeros on each processor */ 3377 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3378 procsnz[i] += rowlengths[j]; 3379 } 3380 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3381 } 3382 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3383 } else { 3384 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3385 } 3386 3387 if (!rank) { 3388 /* determine max buffer needed and allocate it */ 3389 maxnz = 0; 3390 for (i=0; i<size; i++) { 3391 maxnz = PetscMax(maxnz,procsnz[i]); 3392 } 3393 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3394 3395 /* read in my part of the matrix column indices */ 3396 nz = procsnz[0]; 3397 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3398 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3399 3400 /* read in every one elses and ship off */ 3401 for (i=1; i<size; i++) { 3402 nz = procsnz[i]; 3403 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3404 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3405 } 3406 ierr = PetscFree(cols);CHKERRQ(ierr); 3407 } else { 3408 /* determine buffer space needed for message */ 3409 nz = 0; 3410 for (i=0; i<m; i++) { 3411 nz += ourlens[i]; 3412 } 3413 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3414 3415 /* receive message of column indices*/ 3416 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3417 } 3418 3419 /* determine column ownership if matrix is not square */ 3420 if (N != M) { 3421 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3422 else n = newMat->cmap->n; 3423 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3424 cstart = cend - n; 3425 } else { 3426 cstart = rstart; 3427 cend = rend; 3428 n = cend - cstart; 3429 } 3430 3431 /* loop over local rows, determining number of off diagonal entries */ 3432 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3433 jj = 0; 3434 for (i=0; i<m; i++) { 3435 for (j=0; j<ourlens[i]; j++) { 3436 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3437 jj++; 3438 } 3439 } 3440 3441 for (i=0; i<m; i++) { 3442 ourlens[i] -= offlens[i]; 3443 } 3444 if (!sizesset) { 3445 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3446 } 3447 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3448 3449 for (i=0; i<m; i++) { 3450 ourlens[i] += offlens[i]; 3451 } 3452 3453 if (!rank) { 3454 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3455 3456 /* read in my part of the matrix numerical values */ 3457 nz = procsnz[0]; 3458 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3459 3460 /* insert into matrix */ 3461 jj = rstart; 3462 smycols = mycols; 3463 svals = vals; 3464 for (i=0; i<m; i++) { 3465 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3466 smycols += ourlens[i]; 3467 svals += ourlens[i]; 3468 jj++; 3469 } 3470 3471 /* read in other processors and ship out */ 3472 for (i=1; i<size; i++) { 3473 nz = procsnz[i]; 3474 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3475 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3476 } 3477 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3478 } else { 3479 /* receive numeric values */ 3480 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3481 3482 /* receive message of values*/ 3483 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3484 3485 /* insert into matrix */ 3486 jj = rstart; 3487 smycols = mycols; 3488 svals = vals; 3489 for (i=0; i<m; i++) { 3490 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3491 smycols += ourlens[i]; 3492 svals += ourlens[i]; 3493 jj++; 3494 } 3495 } 3496 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3497 ierr = PetscFree(vals);CHKERRQ(ierr); 3498 ierr = PetscFree(mycols);CHKERRQ(ierr); 3499 ierr = PetscFree(rowners);CHKERRQ(ierr); 3500 3501 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3502 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3503 PetscFunctionReturn(0); 3504 } 3505 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3508 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3509 { 3510 PetscErrorCode ierr; 3511 IS iscol_local; 3512 PetscInt csize; 3513 3514 PetscFunctionBegin; 3515 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3516 if (call == MAT_REUSE_MATRIX) { 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3518 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3519 } else { 3520 PetscInt cbs; 3521 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3522 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3523 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3524 } 3525 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3526 if (call == MAT_INITIAL_MATRIX) { 3527 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3528 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3529 } 3530 PetscFunctionReturn(0); 3531 } 3532 3533 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3534 #undef __FUNCT__ 3535 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3536 /* 3537 Not great since it makes two copies of the submatrix, first an SeqAIJ 3538 in local and then by concatenating the local matrices the end result. 3539 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3540 3541 Note: This requires a sequential iscol with all indices. 3542 */ 3543 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3544 { 3545 PetscErrorCode ierr; 3546 PetscMPIInt rank,size; 3547 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3548 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3549 PetscBool allcolumns, colflag; 3550 Mat M,Mreuse; 3551 MatScalar *vwork,*aa; 3552 MPI_Comm comm = ((PetscObject)mat)->comm; 3553 Mat_SeqAIJ *aij; 3554 3555 3556 PetscFunctionBegin; 3557 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3558 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3559 3560 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3561 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3562 if (colflag && ncol == mat->cmap->N){ 3563 allcolumns = PETSC_TRUE; 3564 } else { 3565 allcolumns = PETSC_FALSE; 3566 } 3567 if (call == MAT_REUSE_MATRIX) { 3568 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3569 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3570 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3571 } else { 3572 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3573 } 3574 3575 /* 3576 m - number of local rows 3577 n - number of columns (same on all processors) 3578 rstart - first row in new global matrix generated 3579 */ 3580 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3581 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3582 if (call == MAT_INITIAL_MATRIX) { 3583 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3584 ii = aij->i; 3585 jj = aij->j; 3586 3587 /* 3588 Determine the number of non-zeros in the diagonal and off-diagonal 3589 portions of the matrix in order to do correct preallocation 3590 */ 3591 3592 /* first get start and end of "diagonal" columns */ 3593 if (csize == PETSC_DECIDE) { 3594 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3595 if (mglobal == n) { /* square matrix */ 3596 nlocal = m; 3597 } else { 3598 nlocal = n/size + ((n % size) > rank); 3599 } 3600 } else { 3601 nlocal = csize; 3602 } 3603 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3604 rstart = rend - nlocal; 3605 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3606 3607 /* next, compute all the lengths */ 3608 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3609 olens = dlens + m; 3610 for (i=0; i<m; i++) { 3611 jend = ii[i+1] - ii[i]; 3612 olen = 0; 3613 dlen = 0; 3614 for (j=0; j<jend; j++) { 3615 if (*jj < rstart || *jj >= rend) olen++; 3616 else dlen++; 3617 jj++; 3618 } 3619 olens[i] = olen; 3620 dlens[i] = dlen; 3621 } 3622 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3623 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3624 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3625 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3626 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3627 ierr = PetscFree(dlens);CHKERRQ(ierr); 3628 } else { 3629 PetscInt ml,nl; 3630 3631 M = *newmat; 3632 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3633 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3634 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3635 /* 3636 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3637 rather than the slower MatSetValues(). 3638 */ 3639 M->was_assembled = PETSC_TRUE; 3640 M->assembled = PETSC_FALSE; 3641 } 3642 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3643 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3644 ii = aij->i; 3645 jj = aij->j; 3646 aa = aij->a; 3647 for (i=0; i<m; i++) { 3648 row = rstart + i; 3649 nz = ii[i+1] - ii[i]; 3650 cwork = jj; jj += nz; 3651 vwork = aa; aa += nz; 3652 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3653 } 3654 3655 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 *newmat = M; 3658 3659 /* save submatrix used in processor for next request */ 3660 if (call == MAT_INITIAL_MATRIX) { 3661 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3662 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3663 } 3664 3665 PetscFunctionReturn(0); 3666 } 3667 3668 EXTERN_C_BEGIN 3669 #undef __FUNCT__ 3670 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3671 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3672 { 3673 PetscInt m,cstart, cend,j,nnz,i,d; 3674 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3675 const PetscInt *JJ; 3676 PetscScalar *values; 3677 PetscErrorCode ierr; 3678 3679 PetscFunctionBegin; 3680 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3681 3682 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3683 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3684 m = B->rmap->n; 3685 cstart = B->cmap->rstart; 3686 cend = B->cmap->rend; 3687 rstart = B->rmap->rstart; 3688 3689 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3690 3691 #if defined(PETSC_USE_DEBUGGING) 3692 for (i=0; i<m; i++) { 3693 nnz = Ii[i+1]- Ii[i]; 3694 JJ = J + Ii[i]; 3695 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3696 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3697 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3698 } 3699 #endif 3700 3701 for (i=0; i<m; i++) { 3702 nnz = Ii[i+1]- Ii[i]; 3703 JJ = J + Ii[i]; 3704 nnz_max = PetscMax(nnz_max,nnz); 3705 d = 0; 3706 for (j=0; j<nnz; j++) { 3707 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3708 } 3709 d_nnz[i] = d; 3710 o_nnz[i] = nnz - d; 3711 } 3712 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3713 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3714 3715 if (v) values = (PetscScalar*)v; 3716 else { 3717 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3718 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3719 } 3720 3721 for (i=0; i<m; i++) { 3722 ii = i + rstart; 3723 nnz = Ii[i+1]- Ii[i]; 3724 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3725 } 3726 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3727 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3728 3729 if (!v) { 3730 ierr = PetscFree(values);CHKERRQ(ierr); 3731 } 3732 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3733 PetscFunctionReturn(0); 3734 } 3735 EXTERN_C_END 3736 3737 #undef __FUNCT__ 3738 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3739 /*@ 3740 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3741 (the default parallel PETSc format). 3742 3743 Collective on MPI_Comm 3744 3745 Input Parameters: 3746 + B - the matrix 3747 . i - the indices into j for the start of each local row (starts with zero) 3748 . j - the column indices for each local row (starts with zero) 3749 - v - optional values in the matrix 3750 3751 Level: developer 3752 3753 Notes: 3754 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3755 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3756 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3757 3758 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3759 3760 The format which is used for the sparse matrix input, is equivalent to a 3761 row-major ordering.. i.e for the following matrix, the input data expected is 3762 as shown: 3763 3764 1 0 0 3765 2 0 3 P0 3766 ------- 3767 4 5 6 P1 3768 3769 Process0 [P0]: rows_owned=[0,1] 3770 i = {0,1,3} [size = nrow+1 = 2+1] 3771 j = {0,0,2} [size = nz = 6] 3772 v = {1,2,3} [size = nz = 6] 3773 3774 Process1 [P1]: rows_owned=[2] 3775 i = {0,3} [size = nrow+1 = 1+1] 3776 j = {0,1,2} [size = nz = 6] 3777 v = {4,5,6} [size = nz = 6] 3778 3779 .keywords: matrix, aij, compressed row, sparse, parallel 3780 3781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3782 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3783 @*/ 3784 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3785 { 3786 PetscErrorCode ierr; 3787 3788 PetscFunctionBegin; 3789 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3790 PetscFunctionReturn(0); 3791 } 3792 3793 #undef __FUNCT__ 3794 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3795 /*@C 3796 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3797 (the default parallel PETSc format). For good matrix assembly performance 3798 the user should preallocate the matrix storage by setting the parameters 3799 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3800 performance can be increased by more than a factor of 50. 3801 3802 Collective on MPI_Comm 3803 3804 Input Parameters: 3805 + A - the matrix 3806 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3807 (same value is used for all local rows) 3808 . d_nnz - array containing the number of nonzeros in the various rows of the 3809 DIAGONAL portion of the local submatrix (possibly different for each row) 3810 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3811 The size of this array is equal to the number of local rows, i.e 'm'. 3812 For matrices that will be factored, you must leave room for (and set) 3813 the diagonal entry even if it is zero. 3814 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3815 submatrix (same value is used for all local rows). 3816 - o_nnz - array containing the number of nonzeros in the various rows of the 3817 OFF-DIAGONAL portion of the local submatrix (possibly different for 3818 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3819 structure. The size of this array is equal to the number 3820 of local rows, i.e 'm'. 3821 3822 If the *_nnz parameter is given then the *_nz parameter is ignored 3823 3824 The AIJ format (also called the Yale sparse matrix format or 3825 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3826 storage. The stored row and column indices begin with zero. 3827 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3828 3829 The parallel matrix is partitioned such that the first m0 rows belong to 3830 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3831 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3832 3833 The DIAGONAL portion of the local submatrix of a processor can be defined 3834 as the submatrix which is obtained by extraction the part corresponding to 3835 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3836 first row that belongs to the processor, r2 is the last row belonging to 3837 the this processor, and c1-c2 is range of indices of the local part of a 3838 vector suitable for applying the matrix to. This is an mxn matrix. In the 3839 common case of a square matrix, the row and column ranges are the same and 3840 the DIAGONAL part is also square. The remaining portion of the local 3841 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3842 3843 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3844 3845 You can call MatGetInfo() to get information on how effective the preallocation was; 3846 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3847 You can also run with the option -info and look for messages with the string 3848 malloc in them to see if additional memory allocation was needed. 3849 3850 Example usage: 3851 3852 Consider the following 8x8 matrix with 34 non-zero values, that is 3853 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3854 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3855 as follows: 3856 3857 .vb 3858 1 2 0 | 0 3 0 | 0 4 3859 Proc0 0 5 6 | 7 0 0 | 8 0 3860 9 0 10 | 11 0 0 | 12 0 3861 ------------------------------------- 3862 13 0 14 | 15 16 17 | 0 0 3863 Proc1 0 18 0 | 19 20 21 | 0 0 3864 0 0 0 | 22 23 0 | 24 0 3865 ------------------------------------- 3866 Proc2 25 26 27 | 0 0 28 | 29 0 3867 30 0 0 | 31 32 33 | 0 34 3868 .ve 3869 3870 This can be represented as a collection of submatrices as: 3871 3872 .vb 3873 A B C 3874 D E F 3875 G H I 3876 .ve 3877 3878 Where the submatrices A,B,C are owned by proc0, D,E,F are 3879 owned by proc1, G,H,I are owned by proc2. 3880 3881 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3882 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3883 The 'M','N' parameters are 8,8, and have the same values on all procs. 3884 3885 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3886 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3887 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3888 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3889 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3890 matrix, ans [DF] as another SeqAIJ matrix. 3891 3892 When d_nz, o_nz parameters are specified, d_nz storage elements are 3893 allocated for every row of the local diagonal submatrix, and o_nz 3894 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3895 One way to choose d_nz and o_nz is to use the max nonzerors per local 3896 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3897 In this case, the values of d_nz,o_nz are: 3898 .vb 3899 proc0 : dnz = 2, o_nz = 2 3900 proc1 : dnz = 3, o_nz = 2 3901 proc2 : dnz = 1, o_nz = 4 3902 .ve 3903 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3904 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3905 for proc3. i.e we are using 12+15+10=37 storage locations to store 3906 34 values. 3907 3908 When d_nnz, o_nnz parameters are specified, the storage is specified 3909 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3910 In the above case the values for d_nnz,o_nnz are: 3911 .vb 3912 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3913 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3914 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3915 .ve 3916 Here the space allocated is sum of all the above values i.e 34, and 3917 hence pre-allocation is perfect. 3918 3919 Level: intermediate 3920 3921 .keywords: matrix, aij, compressed row, sparse, parallel 3922 3923 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3924 MPIAIJ, MatGetInfo() 3925 @*/ 3926 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3927 { 3928 PetscErrorCode ierr; 3929 3930 PetscFunctionBegin; 3931 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3932 PetscValidType(B,1); 3933 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3934 PetscFunctionReturn(0); 3935 } 3936 3937 #undef __FUNCT__ 3938 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3939 /*@ 3940 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3941 CSR format the local rows. 3942 3943 Collective on MPI_Comm 3944 3945 Input Parameters: 3946 + comm - MPI communicator 3947 . m - number of local rows (Cannot be PETSC_DECIDE) 3948 . n - This value should be the same as the local size used in creating the 3949 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3950 calculated if N is given) For square matrices n is almost always m. 3951 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3952 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3953 . i - row indices 3954 . j - column indices 3955 - a - matrix values 3956 3957 Output Parameter: 3958 . mat - the matrix 3959 3960 Level: intermediate 3961 3962 Notes: 3963 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3964 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3965 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3966 3967 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3968 3969 The format which is used for the sparse matrix input, is equivalent to a 3970 row-major ordering.. i.e for the following matrix, the input data expected is 3971 as shown: 3972 3973 1 0 0 3974 2 0 3 P0 3975 ------- 3976 4 5 6 P1 3977 3978 Process0 [P0]: rows_owned=[0,1] 3979 i = {0,1,3} [size = nrow+1 = 2+1] 3980 j = {0,0,2} [size = nz = 6] 3981 v = {1,2,3} [size = nz = 6] 3982 3983 Process1 [P1]: rows_owned=[2] 3984 i = {0,3} [size = nrow+1 = 1+1] 3985 j = {0,1,2} [size = nz = 6] 3986 v = {4,5,6} [size = nz = 6] 3987 3988 .keywords: matrix, aij, compressed row, sparse, parallel 3989 3990 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3991 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3992 @*/ 3993 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3994 { 3995 PetscErrorCode ierr; 3996 3997 PetscFunctionBegin; 3998 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3999 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4000 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4001 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4002 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4003 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4004 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4005 PetscFunctionReturn(0); 4006 } 4007 4008 #undef __FUNCT__ 4009 #define __FUNCT__ "MatCreateAIJ" 4010 /*@C 4011 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4012 (the default parallel PETSc format). For good matrix assembly performance 4013 the user should preallocate the matrix storage by setting the parameters 4014 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4015 performance can be increased by more than a factor of 50. 4016 4017 Collective on MPI_Comm 4018 4019 Input Parameters: 4020 + comm - MPI communicator 4021 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4022 This value should be the same as the local size used in creating the 4023 y vector for the matrix-vector product y = Ax. 4024 . n - This value should be the same as the local size used in creating the 4025 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4026 calculated if N is given) For square matrices n is almost always m. 4027 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4028 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4029 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4030 (same value is used for all local rows) 4031 . d_nnz - array containing the number of nonzeros in the various rows of the 4032 DIAGONAL portion of the local submatrix (possibly different for each row) 4033 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4034 The size of this array is equal to the number of local rows, i.e 'm'. 4035 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4036 submatrix (same value is used for all local rows). 4037 - o_nnz - array containing the number of nonzeros in the various rows of the 4038 OFF-DIAGONAL portion of the local submatrix (possibly different for 4039 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4040 structure. The size of this array is equal to the number 4041 of local rows, i.e 'm'. 4042 4043 Output Parameter: 4044 . A - the matrix 4045 4046 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4047 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4048 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4049 4050 Notes: 4051 If the *_nnz parameter is given then the *_nz parameter is ignored 4052 4053 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4054 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4055 storage requirements for this matrix. 4056 4057 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4058 processor than it must be used on all processors that share the object for 4059 that argument. 4060 4061 The user MUST specify either the local or global matrix dimensions 4062 (possibly both). 4063 4064 The parallel matrix is partitioned across processors such that the 4065 first m0 rows belong to process 0, the next m1 rows belong to 4066 process 1, the next m2 rows belong to process 2 etc.. where 4067 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4068 values corresponding to [m x N] submatrix. 4069 4070 The columns are logically partitioned with the n0 columns belonging 4071 to 0th partition, the next n1 columns belonging to the next 4072 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4073 4074 The DIAGONAL portion of the local submatrix on any given processor 4075 is the submatrix corresponding to the rows and columns m,n 4076 corresponding to the given processor. i.e diagonal matrix on 4077 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4078 etc. The remaining portion of the local submatrix [m x (N-n)] 4079 constitute the OFF-DIAGONAL portion. The example below better 4080 illustrates this concept. 4081 4082 For a square global matrix we define each processor's diagonal portion 4083 to be its local rows and the corresponding columns (a square submatrix); 4084 each processor's off-diagonal portion encompasses the remainder of the 4085 local matrix (a rectangular submatrix). 4086 4087 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4088 4089 When calling this routine with a single process communicator, a matrix of 4090 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4091 type of communicator, use the construction mechanism: 4092 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4093 4094 By default, this format uses inodes (identical nodes) when possible. 4095 We search for consecutive rows with the same nonzero structure, thereby 4096 reusing matrix information to achieve increased efficiency. 4097 4098 Options Database Keys: 4099 + -mat_no_inode - Do not use inodes 4100 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4101 - -mat_aij_oneindex - Internally use indexing starting at 1 4102 rather than 0. Note that when calling MatSetValues(), 4103 the user still MUST index entries starting at 0! 4104 4105 4106 Example usage: 4107 4108 Consider the following 8x8 matrix with 34 non-zero values, that is 4109 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4110 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4111 as follows: 4112 4113 .vb 4114 1 2 0 | 0 3 0 | 0 4 4115 Proc0 0 5 6 | 7 0 0 | 8 0 4116 9 0 10 | 11 0 0 | 12 0 4117 ------------------------------------- 4118 13 0 14 | 15 16 17 | 0 0 4119 Proc1 0 18 0 | 19 20 21 | 0 0 4120 0 0 0 | 22 23 0 | 24 0 4121 ------------------------------------- 4122 Proc2 25 26 27 | 0 0 28 | 29 0 4123 30 0 0 | 31 32 33 | 0 34 4124 .ve 4125 4126 This can be represented as a collection of submatrices as: 4127 4128 .vb 4129 A B C 4130 D E F 4131 G H I 4132 .ve 4133 4134 Where the submatrices A,B,C are owned by proc0, D,E,F are 4135 owned by proc1, G,H,I are owned by proc2. 4136 4137 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4138 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4139 The 'M','N' parameters are 8,8, and have the same values on all procs. 4140 4141 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4142 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4143 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4144 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4145 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4146 matrix, ans [DF] as another SeqAIJ matrix. 4147 4148 When d_nz, o_nz parameters are specified, d_nz storage elements are 4149 allocated for every row of the local diagonal submatrix, and o_nz 4150 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4151 One way to choose d_nz and o_nz is to use the max nonzerors per local 4152 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4153 In this case, the values of d_nz,o_nz are: 4154 .vb 4155 proc0 : dnz = 2, o_nz = 2 4156 proc1 : dnz = 3, o_nz = 2 4157 proc2 : dnz = 1, o_nz = 4 4158 .ve 4159 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4160 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4161 for proc3. i.e we are using 12+15+10=37 storage locations to store 4162 34 values. 4163 4164 When d_nnz, o_nnz parameters are specified, the storage is specified 4165 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4166 In the above case the values for d_nnz,o_nnz are: 4167 .vb 4168 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4169 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4170 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4171 .ve 4172 Here the space allocated is sum of all the above values i.e 34, and 4173 hence pre-allocation is perfect. 4174 4175 Level: intermediate 4176 4177 .keywords: matrix, aij, compressed row, sparse, parallel 4178 4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4180 MPIAIJ, MatCreateMPIAIJWithArrays() 4181 @*/ 4182 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4183 { 4184 PetscErrorCode ierr; 4185 PetscMPIInt size; 4186 4187 PetscFunctionBegin; 4188 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4189 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4190 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4191 if (size > 1) { 4192 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4193 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4194 } else { 4195 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4196 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4197 } 4198 PetscFunctionReturn(0); 4199 } 4200 4201 #undef __FUNCT__ 4202 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4203 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4204 { 4205 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4206 4207 PetscFunctionBegin; 4208 *Ad = a->A; 4209 *Ao = a->B; 4210 *colmap = a->garray; 4211 PetscFunctionReturn(0); 4212 } 4213 4214 #undef __FUNCT__ 4215 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4216 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4217 { 4218 PetscErrorCode ierr; 4219 PetscInt i; 4220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4221 4222 PetscFunctionBegin; 4223 if (coloring->ctype == IS_COLORING_GLOBAL) { 4224 ISColoringValue *allcolors,*colors; 4225 ISColoring ocoloring; 4226 4227 /* set coloring for diagonal portion */ 4228 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4229 4230 /* set coloring for off-diagonal portion */ 4231 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4232 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4233 for (i=0; i<a->B->cmap->n; i++) { 4234 colors[i] = allcolors[a->garray[i]]; 4235 } 4236 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4237 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4238 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4239 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4240 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4241 ISColoringValue *colors; 4242 PetscInt *larray; 4243 ISColoring ocoloring; 4244 4245 /* set coloring for diagonal portion */ 4246 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4247 for (i=0; i<a->A->cmap->n; i++) { 4248 larray[i] = i + A->cmap->rstart; 4249 } 4250 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4251 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4252 for (i=0; i<a->A->cmap->n; i++) { 4253 colors[i] = coloring->colors[larray[i]]; 4254 } 4255 ierr = PetscFree(larray);CHKERRQ(ierr); 4256 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4257 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4258 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4259 4260 /* set coloring for off-diagonal portion */ 4261 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4262 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4263 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4264 for (i=0; i<a->B->cmap->n; i++) { 4265 colors[i] = coloring->colors[larray[i]]; 4266 } 4267 ierr = PetscFree(larray);CHKERRQ(ierr); 4268 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4269 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4270 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4271 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4272 4273 PetscFunctionReturn(0); 4274 } 4275 4276 #if defined(PETSC_HAVE_ADIC) 4277 #undef __FUNCT__ 4278 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4279 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4280 { 4281 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4282 PetscErrorCode ierr; 4283 4284 PetscFunctionBegin; 4285 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4286 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4287 PetscFunctionReturn(0); 4288 } 4289 #endif 4290 4291 #undef __FUNCT__ 4292 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4293 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4294 { 4295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4296 PetscErrorCode ierr; 4297 4298 PetscFunctionBegin; 4299 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4300 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4301 PetscFunctionReturn(0); 4302 } 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4306 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4307 { 4308 PetscErrorCode ierr; 4309 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4310 PetscInt *indx; 4311 4312 PetscFunctionBegin; 4313 /* This routine will ONLY return MPIAIJ type matrix */ 4314 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4315 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4316 if (n == PETSC_DECIDE){ 4317 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4318 } 4319 /* Check sum(n) = N */ 4320 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4321 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4322 4323 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4324 rstart -= m; 4325 4326 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4327 for (i=0;i<m;i++) { 4328 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4329 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4330 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4331 } 4332 4333 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4334 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4335 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4336 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4337 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4338 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4339 PetscFunctionReturn(0); 4340 } 4341 4342 #undef __FUNCT__ 4343 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4344 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4345 { 4346 PetscErrorCode ierr; 4347 PetscInt m,N,i,rstart,nnz,Ii; 4348 PetscInt *indx; 4349 PetscScalar *values; 4350 4351 PetscFunctionBegin; 4352 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4353 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4354 for (i=0;i<m;i++) { 4355 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4356 Ii = i + rstart; 4357 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4358 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4359 } 4360 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4361 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4362 PetscFunctionReturn(0); 4363 } 4364 4365 #undef __FUNCT__ 4366 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4367 /*@ 4368 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4369 matrices from each processor 4370 4371 Collective on MPI_Comm 4372 4373 Input Parameters: 4374 + comm - the communicators the parallel matrix will live on 4375 . inmat - the input sequential matrices 4376 . n - number of local columns (or PETSC_DECIDE) 4377 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4378 4379 Output Parameter: 4380 . outmat - the parallel matrix generated 4381 4382 Level: advanced 4383 4384 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4385 4386 @*/ 4387 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4388 { 4389 PetscErrorCode ierr; 4390 4391 PetscFunctionBegin; 4392 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4393 if (scall == MAT_INITIAL_MATRIX){ 4394 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4395 } 4396 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4397 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4398 PetscFunctionReturn(0); 4399 } 4400 4401 #undef __FUNCT__ 4402 #define __FUNCT__ "MatFileSplit" 4403 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4404 { 4405 PetscErrorCode ierr; 4406 PetscMPIInt rank; 4407 PetscInt m,N,i,rstart,nnz; 4408 size_t len; 4409 const PetscInt *indx; 4410 PetscViewer out; 4411 char *name; 4412 Mat B; 4413 const PetscScalar *values; 4414 4415 PetscFunctionBegin; 4416 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4417 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4418 /* Should this be the type of the diagonal block of A? */ 4419 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4420 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4421 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4422 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4423 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4424 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4425 for (i=0;i<m;i++) { 4426 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4427 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4428 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4429 } 4430 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4431 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4432 4433 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4434 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4435 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4436 sprintf(name,"%s.%d",outfile,rank); 4437 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4438 ierr = PetscFree(name); 4439 ierr = MatView(B,out);CHKERRQ(ierr); 4440 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4441 ierr = MatDestroy(&B);CHKERRQ(ierr); 4442 PetscFunctionReturn(0); 4443 } 4444 4445 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4446 #undef __FUNCT__ 4447 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4448 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4449 { 4450 PetscErrorCode ierr; 4451 Mat_Merge_SeqsToMPI *merge; 4452 PetscContainer container; 4453 4454 PetscFunctionBegin; 4455 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4456 if (container) { 4457 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4458 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4459 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4460 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4461 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4462 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4470 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4471 ierr = PetscFree(merge);CHKERRQ(ierr); 4472 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4473 } 4474 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4475 PetscFunctionReturn(0); 4476 } 4477 4478 #include <../src/mat/utils/freespace.h> 4479 #include <petscbt.h> 4480 4481 #undef __FUNCT__ 4482 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4483 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4484 { 4485 PetscErrorCode ierr; 4486 MPI_Comm comm=((PetscObject)mpimat)->comm; 4487 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4488 PetscMPIInt size,rank,taga,*len_s; 4489 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4490 PetscInt proc,m; 4491 PetscInt **buf_ri,**buf_rj; 4492 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4493 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4494 MPI_Request *s_waits,*r_waits; 4495 MPI_Status *status; 4496 MatScalar *aa=a->a; 4497 MatScalar **abuf_r,*ba_i; 4498 Mat_Merge_SeqsToMPI *merge; 4499 PetscContainer container; 4500 4501 PetscFunctionBegin; 4502 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4503 4504 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4505 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4506 4507 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4508 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4509 4510 bi = merge->bi; 4511 bj = merge->bj; 4512 buf_ri = merge->buf_ri; 4513 buf_rj = merge->buf_rj; 4514 4515 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4516 owners = merge->rowmap->range; 4517 len_s = merge->len_s; 4518 4519 /* send and recv matrix values */ 4520 /*-----------------------------*/ 4521 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4522 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4523 4524 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4525 for (proc=0,k=0; proc<size; proc++){ 4526 if (!len_s[proc]) continue; 4527 i = owners[proc]; 4528 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4529 k++; 4530 } 4531 4532 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4533 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4534 ierr = PetscFree(status);CHKERRQ(ierr); 4535 4536 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4537 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4538 4539 /* insert mat values of mpimat */ 4540 /*----------------------------*/ 4541 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4542 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4543 4544 for (k=0; k<merge->nrecv; k++){ 4545 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4546 nrows = *(buf_ri_k[k]); 4547 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4548 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4549 } 4550 4551 /* set values of ba */ 4552 m = merge->rowmap->n; 4553 for (i=0; i<m; i++) { 4554 arow = owners[rank] + i; 4555 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4556 bnzi = bi[i+1] - bi[i]; 4557 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4558 4559 /* add local non-zero vals of this proc's seqmat into ba */ 4560 anzi = ai[arow+1] - ai[arow]; 4561 aj = a->j + ai[arow]; 4562 aa = a->a + ai[arow]; 4563 nextaj = 0; 4564 for (j=0; nextaj<anzi; j++){ 4565 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4566 ba_i[j] += aa[nextaj++]; 4567 } 4568 } 4569 4570 /* add received vals into ba */ 4571 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4572 /* i-th row */ 4573 if (i == *nextrow[k]) { 4574 anzi = *(nextai[k]+1) - *nextai[k]; 4575 aj = buf_rj[k] + *(nextai[k]); 4576 aa = abuf_r[k] + *(nextai[k]); 4577 nextaj = 0; 4578 for (j=0; nextaj<anzi; j++){ 4579 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4580 ba_i[j] += aa[nextaj++]; 4581 } 4582 } 4583 nextrow[k]++; nextai[k]++; 4584 } 4585 } 4586 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4587 } 4588 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4589 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4590 4591 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4592 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4593 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4594 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4595 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4596 PetscFunctionReturn(0); 4597 } 4598 4599 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4600 4601 #undef __FUNCT__ 4602 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4603 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4604 { 4605 PetscErrorCode ierr; 4606 Mat B_mpi; 4607 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4608 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4609 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4610 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4611 PetscInt len,proc,*dnz,*onz,bs,cbs; 4612 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4613 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4614 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4615 MPI_Status *status; 4616 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4617 PetscBT lnkbt; 4618 Mat_Merge_SeqsToMPI *merge; 4619 PetscContainer container; 4620 4621 PetscFunctionBegin; 4622 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4623 4624 /* make sure it is a PETSc comm */ 4625 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4627 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4628 4629 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4630 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4631 4632 /* determine row ownership */ 4633 /*---------------------------------------------------------*/ 4634 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4635 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4636 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4637 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4638 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4639 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4640 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4641 4642 m = merge->rowmap->n; 4643 M = merge->rowmap->N; 4644 owners = merge->rowmap->range; 4645 4646 /* determine the number of messages to send, their lengths */ 4647 /*---------------------------------------------------------*/ 4648 len_s = merge->len_s; 4649 4650 len = 0; /* length of buf_si[] */ 4651 merge->nsend = 0; 4652 for (proc=0; proc<size; proc++){ 4653 len_si[proc] = 0; 4654 if (proc == rank){ 4655 len_s[proc] = 0; 4656 } else { 4657 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4658 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4659 } 4660 if (len_s[proc]) { 4661 merge->nsend++; 4662 nrows = 0; 4663 for (i=owners[proc]; i<owners[proc+1]; i++){ 4664 if (ai[i+1] > ai[i]) nrows++; 4665 } 4666 len_si[proc] = 2*(nrows+1); 4667 len += len_si[proc]; 4668 } 4669 } 4670 4671 /* determine the number and length of messages to receive for ij-structure */ 4672 /*-------------------------------------------------------------------------*/ 4673 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4674 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4675 4676 /* post the Irecv of j-structure */ 4677 /*-------------------------------*/ 4678 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4679 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4680 4681 /* post the Isend of j-structure */ 4682 /*--------------------------------*/ 4683 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4684 4685 for (proc=0, k=0; proc<size; proc++){ 4686 if (!len_s[proc]) continue; 4687 i = owners[proc]; 4688 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4689 k++; 4690 } 4691 4692 /* receives and sends of j-structure are complete */ 4693 /*------------------------------------------------*/ 4694 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4695 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4696 4697 /* send and recv i-structure */ 4698 /*---------------------------*/ 4699 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4700 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4701 4702 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4703 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4704 for (proc=0,k=0; proc<size; proc++){ 4705 if (!len_s[proc]) continue; 4706 /* form outgoing message for i-structure: 4707 buf_si[0]: nrows to be sent 4708 [1:nrows]: row index (global) 4709 [nrows+1:2*nrows+1]: i-structure index 4710 */ 4711 /*-------------------------------------------*/ 4712 nrows = len_si[proc]/2 - 1; 4713 buf_si_i = buf_si + nrows+1; 4714 buf_si[0] = nrows; 4715 buf_si_i[0] = 0; 4716 nrows = 0; 4717 for (i=owners[proc]; i<owners[proc+1]; i++){ 4718 anzi = ai[i+1] - ai[i]; 4719 if (anzi) { 4720 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4721 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4722 nrows++; 4723 } 4724 } 4725 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4726 k++; 4727 buf_si += len_si[proc]; 4728 } 4729 4730 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4731 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4732 4733 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4734 for (i=0; i<merge->nrecv; i++){ 4735 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4736 } 4737 4738 ierr = PetscFree(len_si);CHKERRQ(ierr); 4739 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4740 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4741 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4742 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4743 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4744 ierr = PetscFree(status);CHKERRQ(ierr); 4745 4746 /* compute a local seq matrix in each processor */ 4747 /*----------------------------------------------*/ 4748 /* allocate bi array and free space for accumulating nonzero column info */ 4749 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4750 bi[0] = 0; 4751 4752 /* create and initialize a linked list */ 4753 nlnk = N+1; 4754 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4755 4756 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4757 len = 0; 4758 len = ai[owners[rank+1]] - ai[owners[rank]]; 4759 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4760 current_space = free_space; 4761 4762 /* determine symbolic info for each local row */ 4763 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4764 4765 for (k=0; k<merge->nrecv; k++){ 4766 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4767 nrows = *buf_ri_k[k]; 4768 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4769 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4770 } 4771 4772 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4773 len = 0; 4774 for (i=0;i<m;i++) { 4775 bnzi = 0; 4776 /* add local non-zero cols of this proc's seqmat into lnk */ 4777 arow = owners[rank] + i; 4778 anzi = ai[arow+1] - ai[arow]; 4779 aj = a->j + ai[arow]; 4780 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4781 bnzi += nlnk; 4782 /* add received col data into lnk */ 4783 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4784 if (i == *nextrow[k]) { /* i-th row */ 4785 anzi = *(nextai[k]+1) - *nextai[k]; 4786 aj = buf_rj[k] + *nextai[k]; 4787 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4788 bnzi += nlnk; 4789 nextrow[k]++; nextai[k]++; 4790 } 4791 } 4792 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4793 4794 /* if free space is not available, make more free space */ 4795 if (current_space->local_remaining<bnzi) { 4796 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4797 nspacedouble++; 4798 } 4799 /* copy data into free space, then initialize lnk */ 4800 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4801 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4802 4803 current_space->array += bnzi; 4804 current_space->local_used += bnzi; 4805 current_space->local_remaining -= bnzi; 4806 4807 bi[i+1] = bi[i] + bnzi; 4808 } 4809 4810 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4811 4812 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4813 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4814 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4815 4816 /* create symbolic parallel matrix B_mpi */ 4817 /*---------------------------------------*/ 4818 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4819 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4820 if (n==PETSC_DECIDE) { 4821 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4822 } else { 4823 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4824 } 4825 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4826 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4827 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4828 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4829 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4830 4831 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4832 B_mpi->assembled = PETSC_FALSE; 4833 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4834 merge->bi = bi; 4835 merge->bj = bj; 4836 merge->buf_ri = buf_ri; 4837 merge->buf_rj = buf_rj; 4838 merge->coi = PETSC_NULL; 4839 merge->coj = PETSC_NULL; 4840 merge->owners_co = PETSC_NULL; 4841 4842 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4843 4844 /* attach the supporting struct to B_mpi for reuse */ 4845 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4846 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4847 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4848 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4849 *mpimat = B_mpi; 4850 4851 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4852 PetscFunctionReturn(0); 4853 } 4854 4855 #undef __FUNCT__ 4856 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4857 /*@C 4858 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4859 matrices from each processor 4860 4861 Collective on MPI_Comm 4862 4863 Input Parameters: 4864 + comm - the communicators the parallel matrix will live on 4865 . seqmat - the input sequential matrices 4866 . m - number of local rows (or PETSC_DECIDE) 4867 . n - number of local columns (or PETSC_DECIDE) 4868 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4869 4870 Output Parameter: 4871 . mpimat - the parallel matrix generated 4872 4873 Level: advanced 4874 4875 Notes: 4876 The dimensions of the sequential matrix in each processor MUST be the same. 4877 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4878 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4879 @*/ 4880 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4881 { 4882 PetscErrorCode ierr; 4883 PetscMPIInt size; 4884 4885 PetscFunctionBegin; 4886 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4887 if (size == 1){ 4888 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4889 if (scall == MAT_INITIAL_MATRIX){ 4890 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4891 } else { 4892 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4893 } 4894 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4895 PetscFunctionReturn(0); 4896 } 4897 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4898 if (scall == MAT_INITIAL_MATRIX){ 4899 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4900 } 4901 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4902 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4903 PetscFunctionReturn(0); 4904 } 4905 4906 #undef __FUNCT__ 4907 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4908 /*@ 4909 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4910 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4911 with MatGetSize() 4912 4913 Not Collective 4914 4915 Input Parameters: 4916 + A - the matrix 4917 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4918 4919 Output Parameter: 4920 . A_loc - the local sequential matrix generated 4921 4922 Level: developer 4923 4924 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4925 4926 @*/ 4927 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4928 { 4929 PetscErrorCode ierr; 4930 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4931 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4932 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4933 MatScalar *aa=a->a,*ba=b->a,*cam; 4934 PetscScalar *ca; 4935 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4936 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4937 PetscBool match; 4938 4939 PetscFunctionBegin; 4940 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4941 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4942 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4943 if (scall == MAT_INITIAL_MATRIX){ 4944 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4945 ci[0] = 0; 4946 for (i=0; i<am; i++){ 4947 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4948 } 4949 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4950 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4951 k = 0; 4952 for (i=0; i<am; i++) { 4953 ncols_o = bi[i+1] - bi[i]; 4954 ncols_d = ai[i+1] - ai[i]; 4955 /* off-diagonal portion of A */ 4956 for (jo=0; jo<ncols_o; jo++) { 4957 col = cmap[*bj]; 4958 if (col >= cstart) break; 4959 cj[k] = col; bj++; 4960 ca[k++] = *ba++; 4961 } 4962 /* diagonal portion of A */ 4963 for (j=0; j<ncols_d; j++) { 4964 cj[k] = cstart + *aj++; 4965 ca[k++] = *aa++; 4966 } 4967 /* off-diagonal portion of A */ 4968 for (j=jo; j<ncols_o; j++) { 4969 cj[k] = cmap[*bj++]; 4970 ca[k++] = *ba++; 4971 } 4972 } 4973 /* put together the new matrix */ 4974 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4975 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4976 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4977 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4978 mat->free_a = PETSC_TRUE; 4979 mat->free_ij = PETSC_TRUE; 4980 mat->nonew = 0; 4981 } else if (scall == MAT_REUSE_MATRIX){ 4982 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4983 ci = mat->i; cj = mat->j; cam = mat->a; 4984 for (i=0; i<am; i++) { 4985 /* off-diagonal portion of A */ 4986 ncols_o = bi[i+1] - bi[i]; 4987 for (jo=0; jo<ncols_o; jo++) { 4988 col = cmap[*bj]; 4989 if (col >= cstart) break; 4990 *cam++ = *ba++; bj++; 4991 } 4992 /* diagonal portion of A */ 4993 ncols_d = ai[i+1] - ai[i]; 4994 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4995 /* off-diagonal portion of A */ 4996 for (j=jo; j<ncols_o; j++) { 4997 *cam++ = *ba++; bj++; 4998 } 4999 } 5000 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5001 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5002 PetscFunctionReturn(0); 5003 } 5004 5005 #undef __FUNCT__ 5006 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5007 /*@C 5008 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5009 5010 Not Collective 5011 5012 Input Parameters: 5013 + A - the matrix 5014 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5015 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5016 5017 Output Parameter: 5018 . A_loc - the local sequential matrix generated 5019 5020 Level: developer 5021 5022 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5023 5024 @*/ 5025 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5026 { 5027 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5028 PetscErrorCode ierr; 5029 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5030 IS isrowa,iscola; 5031 Mat *aloc; 5032 PetscBool match; 5033 5034 PetscFunctionBegin; 5035 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5036 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5037 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5038 if (!row){ 5039 start = A->rmap->rstart; end = A->rmap->rend; 5040 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5041 } else { 5042 isrowa = *row; 5043 } 5044 if (!col){ 5045 start = A->cmap->rstart; 5046 cmap = a->garray; 5047 nzA = a->A->cmap->n; 5048 nzB = a->B->cmap->n; 5049 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5050 ncols = 0; 5051 for (i=0; i<nzB; i++) { 5052 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5053 else break; 5054 } 5055 imark = i; 5056 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5057 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5058 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5059 } else { 5060 iscola = *col; 5061 } 5062 if (scall != MAT_INITIAL_MATRIX){ 5063 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5064 aloc[0] = *A_loc; 5065 } 5066 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5067 *A_loc = aloc[0]; 5068 ierr = PetscFree(aloc);CHKERRQ(ierr); 5069 if (!row){ 5070 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5071 } 5072 if (!col){ 5073 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5074 } 5075 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5076 PetscFunctionReturn(0); 5077 } 5078 5079 #undef __FUNCT__ 5080 #define __FUNCT__ "MatGetBrowsOfAcols" 5081 /*@C 5082 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5083 5084 Collective on Mat 5085 5086 Input Parameters: 5087 + A,B - the matrices in mpiaij format 5088 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5089 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5090 5091 Output Parameter: 5092 + rowb, colb - index sets of rows and columns of B to extract 5093 - B_seq - the sequential matrix generated 5094 5095 Level: developer 5096 5097 @*/ 5098 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5099 { 5100 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5101 PetscErrorCode ierr; 5102 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5103 IS isrowb,iscolb; 5104 Mat *bseq=PETSC_NULL; 5105 5106 PetscFunctionBegin; 5107 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5108 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5109 } 5110 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5111 5112 if (scall == MAT_INITIAL_MATRIX){ 5113 start = A->cmap->rstart; 5114 cmap = a->garray; 5115 nzA = a->A->cmap->n; 5116 nzB = a->B->cmap->n; 5117 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5118 ncols = 0; 5119 for (i=0; i<nzB; i++) { /* row < local row index */ 5120 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5121 else break; 5122 } 5123 imark = i; 5124 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5125 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5126 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5127 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5128 } else { 5129 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5130 isrowb = *rowb; iscolb = *colb; 5131 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5132 bseq[0] = *B_seq; 5133 } 5134 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5135 *B_seq = bseq[0]; 5136 ierr = PetscFree(bseq);CHKERRQ(ierr); 5137 if (!rowb){ 5138 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5139 } else { 5140 *rowb = isrowb; 5141 } 5142 if (!colb){ 5143 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5144 } else { 5145 *colb = iscolb; 5146 } 5147 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5148 PetscFunctionReturn(0); 5149 } 5150 5151 #undef __FUNCT__ 5152 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5153 /* 5154 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5155 of the OFF-DIAGONAL portion of local A 5156 5157 Collective on Mat 5158 5159 Input Parameters: 5160 + A,B - the matrices in mpiaij format 5161 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5162 5163 Output Parameter: 5164 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5165 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5166 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5167 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5168 5169 Level: developer 5170 5171 */ 5172 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5173 { 5174 VecScatter_MPI_General *gen_to,*gen_from; 5175 PetscErrorCode ierr; 5176 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5177 Mat_SeqAIJ *b_oth; 5178 VecScatter ctx=a->Mvctx; 5179 MPI_Comm comm=((PetscObject)ctx)->comm; 5180 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5181 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5182 PetscScalar *rvalues,*svalues; 5183 MatScalar *b_otha,*bufa,*bufA; 5184 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5185 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5186 MPI_Status *sstatus,rstatus; 5187 PetscMPIInt jj; 5188 PetscInt *cols,sbs,rbs; 5189 PetscScalar *vals; 5190 5191 PetscFunctionBegin; 5192 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5193 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5194 } 5195 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5197 5198 gen_to = (VecScatter_MPI_General*)ctx->todata; 5199 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5200 rvalues = gen_from->values; /* holds the length of receiving row */ 5201 svalues = gen_to->values; /* holds the length of sending row */ 5202 nrecvs = gen_from->n; 5203 nsends = gen_to->n; 5204 5205 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5206 srow = gen_to->indices; /* local row index to be sent */ 5207 sstarts = gen_to->starts; 5208 sprocs = gen_to->procs; 5209 sstatus = gen_to->sstatus; 5210 sbs = gen_to->bs; 5211 rstarts = gen_from->starts; 5212 rprocs = gen_from->procs; 5213 rbs = gen_from->bs; 5214 5215 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5216 if (scall == MAT_INITIAL_MATRIX){ 5217 /* i-array */ 5218 /*---------*/ 5219 /* post receives */ 5220 for (i=0; i<nrecvs; i++){ 5221 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5222 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5223 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5224 } 5225 5226 /* pack the outgoing message */ 5227 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5228 sstartsj[0] = 0; rstartsj[0] = 0; 5229 len = 0; /* total length of j or a array to be sent */ 5230 k = 0; 5231 for (i=0; i<nsends; i++){ 5232 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5233 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5234 for (j=0; j<nrows; j++) { 5235 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5236 for (l=0; l<sbs; l++){ 5237 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5238 rowlen[j*sbs+l] = ncols; 5239 len += ncols; 5240 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5241 } 5242 k++; 5243 } 5244 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5245 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5246 } 5247 /* recvs and sends of i-array are completed */ 5248 i = nrecvs; 5249 while (i--) { 5250 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5251 } 5252 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5253 5254 /* allocate buffers for sending j and a arrays */ 5255 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5256 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5257 5258 /* create i-array of B_oth */ 5259 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5260 b_othi[0] = 0; 5261 len = 0; /* total length of j or a array to be received */ 5262 k = 0; 5263 for (i=0; i<nrecvs; i++){ 5264 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5265 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5266 for (j=0; j<nrows; j++) { 5267 b_othi[k+1] = b_othi[k] + rowlen[j]; 5268 len += rowlen[j]; k++; 5269 } 5270 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5271 } 5272 5273 /* allocate space for j and a arrrays of B_oth */ 5274 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5275 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5276 5277 /* j-array */ 5278 /*---------*/ 5279 /* post receives of j-array */ 5280 for (i=0; i<nrecvs; i++){ 5281 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5282 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5283 } 5284 5285 /* pack the outgoing message j-array */ 5286 k = 0; 5287 for (i=0; i<nsends; i++){ 5288 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5289 bufJ = bufj+sstartsj[i]; 5290 for (j=0; j<nrows; j++) { 5291 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5292 for (ll=0; ll<sbs; ll++){ 5293 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5294 for (l=0; l<ncols; l++){ 5295 *bufJ++ = cols[l]; 5296 } 5297 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5298 } 5299 } 5300 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5301 } 5302 5303 /* recvs and sends of j-array are completed */ 5304 i = nrecvs; 5305 while (i--) { 5306 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5307 } 5308 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5309 } else if (scall == MAT_REUSE_MATRIX){ 5310 sstartsj = *startsj_s; 5311 rstartsj = *startsj_r; 5312 bufa = *bufa_ptr; 5313 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5314 b_otha = b_oth->a; 5315 } else { 5316 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5317 } 5318 5319 /* a-array */ 5320 /*---------*/ 5321 /* post receives of a-array */ 5322 for (i=0; i<nrecvs; i++){ 5323 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5324 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5325 } 5326 5327 /* pack the outgoing message a-array */ 5328 k = 0; 5329 for (i=0; i<nsends; i++){ 5330 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5331 bufA = bufa+sstartsj[i]; 5332 for (j=0; j<nrows; j++) { 5333 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5334 for (ll=0; ll<sbs; ll++){ 5335 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5336 for (l=0; l<ncols; l++){ 5337 *bufA++ = vals[l]; 5338 } 5339 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5340 } 5341 } 5342 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5343 } 5344 /* recvs and sends of a-array are completed */ 5345 i = nrecvs; 5346 while (i--) { 5347 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5348 } 5349 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5350 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5351 5352 if (scall == MAT_INITIAL_MATRIX){ 5353 /* put together the new matrix */ 5354 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5355 5356 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5357 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5358 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5359 b_oth->free_a = PETSC_TRUE; 5360 b_oth->free_ij = PETSC_TRUE; 5361 b_oth->nonew = 0; 5362 5363 ierr = PetscFree(bufj);CHKERRQ(ierr); 5364 if (!startsj_s || !bufa_ptr){ 5365 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5366 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5367 } else { 5368 *startsj_s = sstartsj; 5369 *startsj_r = rstartsj; 5370 *bufa_ptr = bufa; 5371 } 5372 } 5373 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5374 PetscFunctionReturn(0); 5375 } 5376 5377 #undef __FUNCT__ 5378 #define __FUNCT__ "MatGetCommunicationStructs" 5379 /*@C 5380 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5381 5382 Not Collective 5383 5384 Input Parameters: 5385 . A - The matrix in mpiaij format 5386 5387 Output Parameter: 5388 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5389 . colmap - A map from global column index to local index into lvec 5390 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5391 5392 Level: developer 5393 5394 @*/ 5395 #if defined (PETSC_USE_CTABLE) 5396 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5397 #else 5398 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5399 #endif 5400 { 5401 Mat_MPIAIJ *a; 5402 5403 PetscFunctionBegin; 5404 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5405 PetscValidPointer(lvec, 2); 5406 PetscValidPointer(colmap, 3); 5407 PetscValidPointer(multScatter, 4); 5408 a = (Mat_MPIAIJ *) A->data; 5409 if (lvec) *lvec = a->lvec; 5410 if (colmap) *colmap = a->colmap; 5411 if (multScatter) *multScatter = a->Mvctx; 5412 PetscFunctionReturn(0); 5413 } 5414 5415 EXTERN_C_BEGIN 5416 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5417 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5418 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5419 EXTERN_C_END 5420 5421 #undef __FUNCT__ 5422 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5423 /* 5424 Computes (B'*A')' since computing B*A directly is untenable 5425 5426 n p p 5427 ( ) ( ) ( ) 5428 m ( A ) * n ( B ) = m ( C ) 5429 ( ) ( ) ( ) 5430 5431 */ 5432 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5433 { 5434 PetscErrorCode ierr; 5435 Mat At,Bt,Ct; 5436 5437 PetscFunctionBegin; 5438 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5439 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5440 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5441 ierr = MatDestroy(&At);CHKERRQ(ierr); 5442 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5443 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5444 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5445 PetscFunctionReturn(0); 5446 } 5447 5448 #undef __FUNCT__ 5449 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5450 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5451 { 5452 PetscErrorCode ierr; 5453 PetscInt m=A->rmap->n,n=B->cmap->n; 5454 Mat Cmat; 5455 5456 PetscFunctionBegin; 5457 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5458 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5459 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5460 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5461 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5462 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5463 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5464 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5465 *C = Cmat; 5466 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5467 PetscFunctionReturn(0); 5468 } 5469 5470 /* ----------------------------------------------------------------*/ 5471 #undef __FUNCT__ 5472 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5473 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5474 { 5475 PetscErrorCode ierr; 5476 5477 PetscFunctionBegin; 5478 if (scall == MAT_INITIAL_MATRIX){ 5479 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5480 } 5481 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5482 PetscFunctionReturn(0); 5483 } 5484 5485 EXTERN_C_BEGIN 5486 #if defined(PETSC_HAVE_MUMPS) 5487 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5488 #endif 5489 #if defined(PETSC_HAVE_PASTIX) 5490 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5491 #endif 5492 #if defined(PETSC_HAVE_SUPERLU_DIST) 5493 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5494 #endif 5495 #if defined(PETSC_HAVE_SPOOLES) 5496 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5497 #endif 5498 EXTERN_C_END 5499 5500 /*MC 5501 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5502 5503 Options Database Keys: 5504 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5505 5506 Level: beginner 5507 5508 .seealso: MatCreateAIJ() 5509 M*/ 5510 5511 EXTERN_C_BEGIN 5512 #undef __FUNCT__ 5513 #define __FUNCT__ "MatCreate_MPIAIJ" 5514 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5515 { 5516 Mat_MPIAIJ *b; 5517 PetscErrorCode ierr; 5518 PetscMPIInt size; 5519 5520 PetscFunctionBegin; 5521 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5522 5523 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5524 B->data = (void*)b; 5525 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5526 B->assembled = PETSC_FALSE; 5527 5528 B->insertmode = NOT_SET_VALUES; 5529 b->size = size; 5530 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5531 5532 /* build cache for off array entries formed */ 5533 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5534 b->donotstash = PETSC_FALSE; 5535 b->colmap = 0; 5536 b->garray = 0; 5537 b->roworiented = PETSC_TRUE; 5538 5539 /* stuff used for matrix vector multiply */ 5540 b->lvec = PETSC_NULL; 5541 b->Mvctx = PETSC_NULL; 5542 5543 /* stuff for MatGetRow() */ 5544 b->rowindices = 0; 5545 b->rowvalues = 0; 5546 b->getrowactive = PETSC_FALSE; 5547 5548 #if defined(PETSC_HAVE_SPOOLES) 5549 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5550 "MatGetFactor_mpiaij_spooles", 5551 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5552 #endif 5553 #if defined(PETSC_HAVE_MUMPS) 5554 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5555 "MatGetFactor_aij_mumps", 5556 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5557 #endif 5558 #if defined(PETSC_HAVE_PASTIX) 5559 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5560 "MatGetFactor_mpiaij_pastix", 5561 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5562 #endif 5563 #if defined(PETSC_HAVE_SUPERLU_DIST) 5564 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5565 "MatGetFactor_mpiaij_superlu_dist", 5566 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5567 #endif 5568 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5569 "MatStoreValues_MPIAIJ", 5570 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5571 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5572 "MatRetrieveValues_MPIAIJ", 5573 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5575 "MatGetDiagonalBlock_MPIAIJ", 5576 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5577 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5578 "MatIsTranspose_MPIAIJ", 5579 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5581 "MatMPIAIJSetPreallocation_MPIAIJ", 5582 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5583 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5584 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5585 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5586 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5587 "MatDiagonalScaleLocal_MPIAIJ", 5588 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5590 "MatConvert_MPIAIJ_MPIAIJPERM", 5591 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5593 "MatConvert_MPIAIJ_MPIAIJCRL", 5594 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5596 "MatConvert_MPIAIJ_MPISBAIJ", 5597 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5599 "MatMatMult_MPIDense_MPIAIJ", 5600 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5602 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5603 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5605 "MatMatMultNumeric_MPIDense_MPIAIJ", 5606 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5608 PetscFunctionReturn(0); 5609 } 5610 EXTERN_C_END 5611 5612 #undef __FUNCT__ 5613 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5614 /*@ 5615 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5616 and "off-diagonal" part of the matrix in CSR format. 5617 5618 Collective on MPI_Comm 5619 5620 Input Parameters: 5621 + comm - MPI communicator 5622 . m - number of local rows (Cannot be PETSC_DECIDE) 5623 . n - This value should be the same as the local size used in creating the 5624 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5625 calculated if N is given) For square matrices n is almost always m. 5626 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5627 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5628 . i - row indices for "diagonal" portion of matrix 5629 . j - column indices 5630 . a - matrix values 5631 . oi - row indices for "off-diagonal" portion of matrix 5632 . oj - column indices 5633 - oa - matrix values 5634 5635 Output Parameter: 5636 . mat - the matrix 5637 5638 Level: advanced 5639 5640 Notes: 5641 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5642 must free the arrays once the matrix has been destroyed and not before. 5643 5644 The i and j indices are 0 based 5645 5646 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5647 5648 This sets local rows and cannot be used to set off-processor values. 5649 5650 You cannot later use MatSetValues() to change values in this matrix. 5651 5652 .keywords: matrix, aij, compressed row, sparse, parallel 5653 5654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5655 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5656 @*/ 5657 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5658 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5659 { 5660 PetscErrorCode ierr; 5661 Mat_MPIAIJ *maij; 5662 5663 PetscFunctionBegin; 5664 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5665 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5666 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5667 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5668 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5669 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5670 maij = (Mat_MPIAIJ*) (*mat)->data; 5671 maij->donotstash = PETSC_TRUE; 5672 (*mat)->preallocated = PETSC_TRUE; 5673 5674 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5675 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5676 5677 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5678 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5679 5680 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5681 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5682 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5683 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5684 5685 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 PetscFunctionReturn(0); 5688 } 5689 5690 /* 5691 Special version for direct calls from Fortran 5692 */ 5693 #include <petsc-private/fortranimpl.h> 5694 5695 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5696 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5697 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5698 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5699 #endif 5700 5701 /* Change these macros so can be used in void function */ 5702 #undef CHKERRQ 5703 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5704 #undef SETERRQ2 5705 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5706 #undef SETERRQ3 5707 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5708 #undef SETERRQ 5709 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5710 5711 EXTERN_C_BEGIN 5712 #undef __FUNCT__ 5713 #define __FUNCT__ "matsetvaluesmpiaij_" 5714 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5715 { 5716 Mat mat = *mmat; 5717 PetscInt m = *mm, n = *mn; 5718 InsertMode addv = *maddv; 5719 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5720 PetscScalar value; 5721 PetscErrorCode ierr; 5722 5723 MatCheckPreallocated(mat,1); 5724 if (mat->insertmode == NOT_SET_VALUES) { 5725 mat->insertmode = addv; 5726 } 5727 #if defined(PETSC_USE_DEBUG) 5728 else if (mat->insertmode != addv) { 5729 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5730 } 5731 #endif 5732 { 5733 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5734 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5735 PetscBool roworiented = aij->roworiented; 5736 5737 /* Some Variables required in the macro */ 5738 Mat A = aij->A; 5739 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5740 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5741 MatScalar *aa = a->a; 5742 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5743 Mat B = aij->B; 5744 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5745 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5746 MatScalar *ba = b->a; 5747 5748 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5749 PetscInt nonew = a->nonew; 5750 MatScalar *ap1,*ap2; 5751 5752 PetscFunctionBegin; 5753 for (i=0; i<m; i++) { 5754 if (im[i] < 0) continue; 5755 #if defined(PETSC_USE_DEBUG) 5756 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5757 #endif 5758 if (im[i] >= rstart && im[i] < rend) { 5759 row = im[i] - rstart; 5760 lastcol1 = -1; 5761 rp1 = aj + ai[row]; 5762 ap1 = aa + ai[row]; 5763 rmax1 = aimax[row]; 5764 nrow1 = ailen[row]; 5765 low1 = 0; 5766 high1 = nrow1; 5767 lastcol2 = -1; 5768 rp2 = bj + bi[row]; 5769 ap2 = ba + bi[row]; 5770 rmax2 = bimax[row]; 5771 nrow2 = bilen[row]; 5772 low2 = 0; 5773 high2 = nrow2; 5774 5775 for (j=0; j<n; j++) { 5776 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5777 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5778 if (in[j] >= cstart && in[j] < cend){ 5779 col = in[j] - cstart; 5780 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5781 } else if (in[j] < 0) continue; 5782 #if defined(PETSC_USE_DEBUG) 5783 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5784 #endif 5785 else { 5786 if (mat->was_assembled) { 5787 if (!aij->colmap) { 5788 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5789 } 5790 #if defined (PETSC_USE_CTABLE) 5791 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5792 col--; 5793 #else 5794 col = aij->colmap[in[j]] - 1; 5795 #endif 5796 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5797 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5798 col = in[j]; 5799 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5800 B = aij->B; 5801 b = (Mat_SeqAIJ*)B->data; 5802 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5803 rp2 = bj + bi[row]; 5804 ap2 = ba + bi[row]; 5805 rmax2 = bimax[row]; 5806 nrow2 = bilen[row]; 5807 low2 = 0; 5808 high2 = nrow2; 5809 bm = aij->B->rmap->n; 5810 ba = b->a; 5811 } 5812 } else col = in[j]; 5813 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5814 } 5815 } 5816 } else { 5817 if (!aij->donotstash) { 5818 if (roworiented) { 5819 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5820 } else { 5821 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5822 } 5823 } 5824 } 5825 }} 5826 PetscFunctionReturnVoid(); 5827 } 5828 EXTERN_C_END 5829 5830