1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 1325 PetscFunctionBegin; 1326 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1327 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1328 nz = A->nz + B->nz; 1329 if (!rank) { 1330 header[0] = MAT_FILE_CLASSID; 1331 header[1] = mat->rmap->N; 1332 header[2] = mat->cmap->N; 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 /* get largest number of rows any processor has */ 1337 rlen = mat->rmap->n; 1338 range = mat->rmap->range; 1339 for (i=1; i<size; i++) { 1340 rlen = PetscMax(rlen,range[i+1] - range[i]); 1341 } 1342 } else { 1343 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1344 rlen = mat->rmap->n; 1345 } 1346 1347 /* load up the local row counts */ 1348 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1349 for (i=0; i<mat->rmap->n; i++) { 1350 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1351 } 1352 1353 /* store the row lengths to the file */ 1354 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1355 if (!rank) { 1356 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 for (i=1; i<size; i++) { 1358 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1359 rlen = range[i+1] - range[i]; 1360 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1361 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1362 } 1363 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1364 } else { 1365 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1366 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1367 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1368 } 1369 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1370 1371 /* load up the local column indices */ 1372 nzmax = nz; /* )th processor needs space a largest processor needs */ 1373 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1374 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1375 cnt = 0; 1376 for (i=0; i<mat->rmap->n; i++) { 1377 for (j=B->i[i]; j<B->i[i+1]; j++) { 1378 if ( (col = garray[B->j[j]]) > cstart) break; 1379 column_indices[cnt++] = col; 1380 } 1381 for (k=A->i[i]; k<A->i[i+1]; k++) { 1382 column_indices[cnt++] = A->j[k] + cstart; 1383 } 1384 for (; j<B->i[i+1]; j++) { 1385 column_indices[cnt++] = garray[B->j[j]]; 1386 } 1387 } 1388 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1389 1390 /* store the column indices to the file */ 1391 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1392 if (!rank) { 1393 MPI_Status status; 1394 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1395 for (i=1; i<size; i++) { 1396 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1397 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1398 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1399 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1400 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1401 } 1402 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1403 } else { 1404 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1405 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1406 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1408 } 1409 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1410 1411 /* load up the local column values */ 1412 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1413 cnt = 0; 1414 for (i=0; i<mat->rmap->n; i++) { 1415 for (j=B->i[i]; j<B->i[i+1]; j++) { 1416 if ( garray[B->j[j]] > cstart) break; 1417 column_values[cnt++] = B->a[j]; 1418 } 1419 for (k=A->i[i]; k<A->i[i+1]; k++) { 1420 column_values[cnt++] = A->a[k]; 1421 } 1422 for (; j<B->i[i+1]; j++) { 1423 column_values[cnt++] = B->a[j]; 1424 } 1425 } 1426 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1427 1428 /* store the column values to the file */ 1429 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1430 if (!rank) { 1431 MPI_Status status; 1432 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1433 for (i=1; i<size; i++) { 1434 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1435 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1436 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1437 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1438 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1439 } 1440 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1443 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1444 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1446 } 1447 ierr = PetscFree(column_values);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1453 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1454 { 1455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1456 PetscErrorCode ierr; 1457 PetscMPIInt rank = aij->rank,size = aij->size; 1458 PetscBool isdraw,iascii,isbinary; 1459 PetscViewer sviewer; 1460 PetscViewerFormat format; 1461 1462 PetscFunctionBegin; 1463 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1464 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1465 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1466 if (iascii) { 1467 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1468 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1469 MatInfo info; 1470 PetscBool inodes; 1471 1472 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1473 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1474 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1476 if (!inodes) { 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1478 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1479 } else { 1480 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1481 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1482 } 1483 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1485 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1487 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1488 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1489 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1490 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1493 PetscInt inodecount,inodelimit,*inodes; 1494 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1495 if (inodes) { 1496 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1497 } else { 1498 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1499 } 1500 PetscFunctionReturn(0); 1501 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1502 PetscFunctionReturn(0); 1503 } 1504 } else if (isbinary) { 1505 if (size == 1) { 1506 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1507 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1508 } else { 1509 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } else if (isdraw) { 1513 PetscDraw draw; 1514 PetscBool isnull; 1515 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1516 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1517 } 1518 1519 if (size == 1) { 1520 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1521 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1522 } else { 1523 /* assemble the entire matrix onto first processor. */ 1524 Mat A; 1525 Mat_SeqAIJ *Aloc; 1526 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1527 MatScalar *a; 1528 1529 if (mat->rmap->N > 1024) { 1530 PetscBool flg = PETSC_FALSE; 1531 1532 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1533 if (!flg) { 1534 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1535 } 1536 } 1537 1538 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1539 if (!rank) { 1540 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1541 } else { 1542 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1543 } 1544 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1545 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1546 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1547 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1548 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1549 1550 /* copy over the A part */ 1551 Aloc = (Mat_SeqAIJ*)aij->A->data; 1552 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1553 row = mat->rmap->rstart; 1554 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1555 for (i=0; i<m; i++) { 1556 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1557 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1558 } 1559 aj = Aloc->j; 1560 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1561 1562 /* copy over the B part */ 1563 Aloc = (Mat_SeqAIJ*)aij->B->data; 1564 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1565 row = mat->rmap->rstart; 1566 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1567 ct = cols; 1568 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1569 for (i=0; i<m; i++) { 1570 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1571 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1572 } 1573 ierr = PetscFree(ct);CHKERRQ(ierr); 1574 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1575 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1576 /* 1577 Everyone has to call to draw the matrix since the graphics waits are 1578 synchronized across all processors that share the PetscDraw object 1579 */ 1580 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1581 if (!rank) { 1582 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1583 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1584 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1585 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1586 } 1587 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 ierr = MatDestroy(&A);CHKERRQ(ierr); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatView_MPIAIJ" 1595 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1596 { 1597 PetscErrorCode ierr; 1598 PetscBool iascii,isdraw,issocket,isbinary; 1599 1600 PetscFunctionBegin; 1601 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1602 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1603 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1604 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1605 if (iascii || isdraw || isbinary || issocket) { 1606 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1607 } else { 1608 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1609 } 1610 PetscFunctionReturn(0); 1611 } 1612 1613 #undef __FUNCT__ 1614 #define __FUNCT__ "MatSOR_MPIAIJ" 1615 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1616 { 1617 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1618 PetscErrorCode ierr; 1619 Vec bb1 = 0; 1620 PetscBool hasop; 1621 1622 PetscFunctionBegin; 1623 if (flag == SOR_APPLY_UPPER) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 PetscFunctionReturn(0); 1626 } 1627 1628 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1629 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1630 } 1631 1632 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1633 if (flag & SOR_ZERO_INITIAL_GUESS) { 1634 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1635 its--; 1636 } 1637 1638 while (its--) { 1639 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 1642 /* update rhs: bb1 = bb - B*x */ 1643 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1644 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1645 1646 /* local sweep */ 1647 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1648 } 1649 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1650 if (flag & SOR_ZERO_INITIAL_GUESS) { 1651 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1652 its--; 1653 } 1654 while (its--) { 1655 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 1658 /* update rhs: bb1 = bb - B*x */ 1659 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1660 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1661 1662 /* local sweep */ 1663 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1664 } 1665 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1666 if (flag & SOR_ZERO_INITIAL_GUESS) { 1667 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1668 its--; 1669 } 1670 while (its--) { 1671 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1672 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 1674 /* update rhs: bb1 = bb - B*x */ 1675 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1676 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1677 1678 /* local sweep */ 1679 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1680 } 1681 } else if (flag & SOR_EISENSTAT) { 1682 Vec xx1; 1683 1684 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1685 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1686 1687 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1688 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1689 if (!mat->diag) { 1690 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1691 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1692 } 1693 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1694 if (hasop) { 1695 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1696 } else { 1697 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1698 } 1699 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1700 1701 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1702 1703 /* local sweep */ 1704 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1705 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1706 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1707 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1708 1709 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatPermute_MPIAIJ" 1715 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1716 { 1717 MPI_Comm comm; 1718 PetscInt first,local_rowsize,local_colsize; 1719 const PetscInt *rows; 1720 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1725 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1726 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1727 /* collect the global row permutation and invert it */ 1728 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1729 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1730 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1731 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1732 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1733 /* get the local target indices */ 1734 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1735 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1736 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1737 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1739 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1740 /* the column permutation is so much easier; 1741 make a local version of 'colp' and invert it */ 1742 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1743 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1744 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1745 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1746 /* now we just get the submatrix */ 1747 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1748 /* clean up */ 1749 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1750 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1756 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1757 { 1758 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1759 Mat A = mat->A,B = mat->B; 1760 PetscErrorCode ierr; 1761 PetscReal isend[5],irecv[5]; 1762 1763 PetscFunctionBegin; 1764 info->block_size = 1.0; 1765 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1766 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1767 isend[3] = info->memory; isend[4] = info->mallocs; 1768 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1769 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1770 isend[3] += info->memory; isend[4] += info->mallocs; 1771 if (flag == MAT_LOCAL) { 1772 info->nz_used = isend[0]; 1773 info->nz_allocated = isend[1]; 1774 info->nz_unneeded = isend[2]; 1775 info->memory = isend[3]; 1776 info->mallocs = isend[4]; 1777 } else if (flag == MAT_GLOBAL_MAX) { 1778 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1779 info->nz_used = irecv[0]; 1780 info->nz_allocated = irecv[1]; 1781 info->nz_unneeded = irecv[2]; 1782 info->memory = irecv[3]; 1783 info->mallocs = irecv[4]; 1784 } else if (flag == MAT_GLOBAL_SUM) { 1785 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1786 info->nz_used = irecv[0]; 1787 info->nz_allocated = irecv[1]; 1788 info->nz_unneeded = irecv[2]; 1789 info->memory = irecv[3]; 1790 info->mallocs = irecv[4]; 1791 } 1792 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1793 info->fill_ratio_needed = 0; 1794 info->factor_mallocs = 0; 1795 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatSetOption_MPIAIJ" 1801 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1802 { 1803 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1804 PetscErrorCode ierr; 1805 1806 PetscFunctionBegin; 1807 switch (op) { 1808 case MAT_NEW_NONZERO_LOCATIONS: 1809 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1810 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1811 case MAT_KEEP_NONZERO_PATTERN: 1812 case MAT_NEW_NONZERO_LOCATION_ERR: 1813 case MAT_USE_INODES: 1814 case MAT_IGNORE_ZERO_ENTRIES: 1815 MatCheckPreallocated(A,1); 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_ROW_ORIENTED: 1820 a->roworiented = flg; 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1823 break; 1824 case MAT_NEW_DIAGONALS: 1825 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1826 break; 1827 case MAT_IGNORE_OFF_PROC_ENTRIES: 1828 a->donotstash = flg; 1829 break; 1830 case MAT_SPD: 1831 A->spd_set = PETSC_TRUE; 1832 A->spd = flg; 1833 if (flg) { 1834 A->symmetric = PETSC_TRUE; 1835 A->structurally_symmetric = PETSC_TRUE; 1836 A->symmetric_set = PETSC_TRUE; 1837 A->structurally_symmetric_set = PETSC_TRUE; 1838 } 1839 break; 1840 case MAT_SYMMETRIC: 1841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1842 break; 1843 case MAT_STRUCTURALLY_SYMMETRIC: 1844 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1845 break; 1846 case MAT_HERMITIAN: 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_SYMMETRY_ETERNAL: 1850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1851 break; 1852 default: 1853 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatGetRow_MPIAIJ" 1860 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1861 { 1862 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1863 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1864 PetscErrorCode ierr; 1865 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1866 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1867 PetscInt *cmap,*idx_p; 1868 1869 PetscFunctionBegin; 1870 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1871 mat->getrowactive = PETSC_TRUE; 1872 1873 if (!mat->rowvalues && (idx || v)) { 1874 /* 1875 allocate enough space to hold information from the longest row. 1876 */ 1877 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1878 PetscInt max = 1,tmp; 1879 for (i=0; i<matin->rmap->n; i++) { 1880 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1881 if (max < tmp) { max = tmp; } 1882 } 1883 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1884 } 1885 1886 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1887 lrow = row - rstart; 1888 1889 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1890 if (!v) {pvA = 0; pvB = 0;} 1891 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1892 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1893 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1894 nztot = nzA + nzB; 1895 1896 cmap = mat->garray; 1897 if (v || idx) { 1898 if (nztot) { 1899 /* Sort by increasing column numbers, assuming A and B already sorted */ 1900 PetscInt imark = -1; 1901 if (v) { 1902 *v = v_p = mat->rowvalues; 1903 for (i=0; i<nzB; i++) { 1904 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1905 else break; 1906 } 1907 imark = i; 1908 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1909 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1910 } 1911 if (idx) { 1912 *idx = idx_p = mat->rowindices; 1913 if (imark > -1) { 1914 for (i=0; i<imark; i++) { 1915 idx_p[i] = cmap[cworkB[i]]; 1916 } 1917 } else { 1918 for (i=0; i<nzB; i++) { 1919 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1920 else break; 1921 } 1922 imark = i; 1923 } 1924 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1925 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1926 } 1927 } else { 1928 if (idx) *idx = 0; 1929 if (v) *v = 0; 1930 } 1931 } 1932 *nz = nztot; 1933 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1934 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 #undef __FUNCT__ 1939 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1940 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1941 { 1942 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1943 1944 PetscFunctionBegin; 1945 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1946 aij->getrowactive = PETSC_FALSE; 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatNorm_MPIAIJ" 1952 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1953 { 1954 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1955 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1956 PetscErrorCode ierr; 1957 PetscInt i,j,cstart = mat->cmap->rstart; 1958 PetscReal sum = 0.0; 1959 MatScalar *v; 1960 1961 PetscFunctionBegin; 1962 if (aij->size == 1) { 1963 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1964 } else { 1965 if (type == NORM_FROBENIUS) { 1966 v = amat->a; 1967 for (i=0; i<amat->nz; i++) { 1968 #if defined(PETSC_USE_COMPLEX) 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 #else 1971 sum += (*v)*(*v); v++; 1972 #endif 1973 } 1974 v = bmat->a; 1975 for (i=0; i<bmat->nz; i++) { 1976 #if defined(PETSC_USE_COMPLEX) 1977 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1978 #else 1979 sum += (*v)*(*v); v++; 1980 #endif 1981 } 1982 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1983 *norm = PetscSqrtReal(*norm); 1984 } else if (type == NORM_1) { /* max column norm */ 1985 PetscReal *tmp,*tmp2; 1986 PetscInt *jj,*garray = aij->garray; 1987 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1988 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1989 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1990 *norm = 0.0; 1991 v = amat->a; jj = amat->j; 1992 for (j=0; j<amat->nz; j++) { 1993 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1994 } 1995 v = bmat->a; jj = bmat->j; 1996 for (j=0; j<bmat->nz; j++) { 1997 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1998 } 1999 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2000 for (j=0; j<mat->cmap->N; j++) { 2001 if (tmp2[j] > *norm) *norm = tmp2[j]; 2002 } 2003 ierr = PetscFree(tmp);CHKERRQ(ierr); 2004 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2005 } else if (type == NORM_INFINITY) { /* max row norm */ 2006 PetscReal ntemp = 0.0; 2007 for (j=0; j<aij->A->rmap->n; j++) { 2008 v = amat->a + amat->i[j]; 2009 sum = 0.0; 2010 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2011 sum += PetscAbsScalar(*v); v++; 2012 } 2013 v = bmat->a + bmat->i[j]; 2014 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2015 sum += PetscAbsScalar(*v); v++; 2016 } 2017 if (sum > ntemp) ntemp = sum; 2018 } 2019 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2020 } else { 2021 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2022 } 2023 } 2024 PetscFunctionReturn(0); 2025 } 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "MatTranspose_MPIAIJ" 2029 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2030 { 2031 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2032 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2033 PetscErrorCode ierr; 2034 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2035 PetscInt cstart=A->cmap->rstart,ncol; 2036 Mat B; 2037 MatScalar *array; 2038 2039 PetscFunctionBegin; 2040 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2041 2042 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2043 ai = Aloc->i; aj = Aloc->j; 2044 bi = Bloc->i; bj = Bloc->j; 2045 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2046 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2047 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2048 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2049 for (i=0; i<ai[ma]; i++){ 2050 d_nnz[aj[i]] ++; 2051 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2052 } 2053 2054 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2055 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2056 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2057 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2058 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2059 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2060 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2061 } else { 2062 B = *matout; 2063 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2064 for (i=0; i<ai[ma]; i++){ 2065 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2066 } 2067 } 2068 2069 /* copy over the A part */ 2070 array = Aloc->a; 2071 row = A->rmap->rstart; 2072 for (i=0; i<ma; i++) { 2073 ncol = ai[i+1]-ai[i]; 2074 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2075 row++; array += ncol; aj += ncol; 2076 } 2077 aj = Aloc->j; 2078 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2079 2080 /* copy over the B part */ 2081 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2082 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2083 array = Bloc->a; 2084 row = A->rmap->rstart; 2085 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2086 cols_tmp = cols; 2087 for (i=0; i<mb; i++) { 2088 ncol = bi[i+1]-bi[i]; 2089 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2090 row++; array += ncol; cols_tmp += ncol; 2091 } 2092 ierr = PetscFree(cols);CHKERRQ(ierr); 2093 2094 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2097 *matout = B; 2098 } else { 2099 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2100 } 2101 PetscFunctionReturn(0); 2102 } 2103 2104 #undef __FUNCT__ 2105 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2106 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2107 { 2108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2109 Mat a = aij->A,b = aij->B; 2110 PetscErrorCode ierr; 2111 PetscInt s1,s2,s3; 2112 2113 PetscFunctionBegin; 2114 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2115 if (rr) { 2116 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2117 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2118 /* Overlap communication with computation. */ 2119 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 } 2121 if (ll) { 2122 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2123 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2124 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2125 } 2126 /* scale the diagonal block */ 2127 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2128 2129 if (rr) { 2130 /* Do a scatter end and then right scale the off-diagonal block */ 2131 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2133 } 2134 2135 PetscFunctionReturn(0); 2136 } 2137 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2140 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatEqual_MPIAIJ" 2152 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2153 { 2154 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2155 Mat a,b,c,d; 2156 PetscBool flg; 2157 PetscErrorCode ierr; 2158 2159 PetscFunctionBegin; 2160 a = matA->A; b = matA->B; 2161 c = matB->A; d = matB->B; 2162 2163 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2164 if (flg) { 2165 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2166 } 2167 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatCopy_MPIAIJ" 2173 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2174 { 2175 PetscErrorCode ierr; 2176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2177 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2178 2179 PetscFunctionBegin; 2180 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2181 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2182 /* because of the column compression in the off-processor part of the matrix a->B, 2183 the number of columns in a->B and b->B may be different, hence we cannot call 2184 the MatCopy() directly on the two parts. If need be, we can provide a more 2185 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2186 then copying the submatrices */ 2187 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2188 } else { 2189 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2190 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatSetUp_MPIAIJ" 2197 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2198 { 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2210 { 2211 PetscInt i,m=Y->rmap->N; 2212 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2213 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2214 const PetscInt *xi = x->i,*yi = y->i; 2215 2216 PetscFunctionBegin; 2217 /* Set the number of nonzeros in the new matrix */ 2218 for(i=0; i<m; i++) { 2219 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2220 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2221 nnz[i] = 0; 2222 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2223 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2224 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2225 nnz[i]++; 2226 } 2227 for (; k<nzy; k++) nnz[i]++; 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatAXPY_MPIAIJ" 2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 PetscInt i; 2238 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2239 PetscBLASInt bnz,one=1; 2240 Mat_SeqAIJ *x,*y; 2241 2242 PetscFunctionBegin; 2243 if (str == SAME_NONZERO_PATTERN) { 2244 PetscScalar alpha = a; 2245 x = (Mat_SeqAIJ *)xx->A->data; 2246 y = (Mat_SeqAIJ *)yy->A->data; 2247 bnz = PetscBLASIntCast(x->nz); 2248 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2249 x = (Mat_SeqAIJ *)xx->B->data; 2250 y = (Mat_SeqAIJ *)yy->B->data; 2251 bnz = PetscBLASIntCast(x->nz); 2252 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2253 } else if (str == SUBSET_NONZERO_PATTERN) { 2254 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2255 2256 x = (Mat_SeqAIJ *)xx->B->data; 2257 y = (Mat_SeqAIJ *)yy->B->data; 2258 if (y->xtoy && y->XtoY != xx->B) { 2259 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2260 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2261 } 2262 if (!y->xtoy) { /* get xtoy */ 2263 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2264 y->XtoY = xx->B; 2265 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2266 } 2267 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2268 } else { 2269 Mat B; 2270 PetscInt *nnz_d,*nnz_o; 2271 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2272 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2273 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2274 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2275 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2276 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2277 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2278 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2279 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2280 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2281 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2282 ierr = MatHeaderReplace(Y,B); 2283 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2284 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2285 } 2286 PetscFunctionReturn(0); 2287 } 2288 2289 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatConjugate_MPIAIJ" 2293 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2294 { 2295 #if defined(PETSC_USE_COMPLEX) 2296 PetscErrorCode ierr; 2297 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2298 2299 PetscFunctionBegin; 2300 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2301 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2302 #else 2303 PetscFunctionBegin; 2304 #endif 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "MatRealPart_MPIAIJ" 2310 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2311 { 2312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2317 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2323 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2330 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #ifdef PETSC_HAVE_PBGL 2335 2336 #include <boost/parallel/mpi/bsp_process_group.hpp> 2337 #include <boost/graph/distributed/ilu_default_graph.hpp> 2338 #include <boost/graph/distributed/ilu_0_block.hpp> 2339 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2340 #include <boost/graph/distributed/petsc/interface.hpp> 2341 #include <boost/multi_array.hpp> 2342 #include <boost/parallel/distributed_property_map->hpp> 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2346 /* 2347 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2348 */ 2349 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2350 { 2351 namespace petsc = boost::distributed::petsc; 2352 2353 namespace graph_dist = boost::graph::distributed; 2354 using boost::graph::distributed::ilu_default::process_group_type; 2355 using boost::graph::ilu_permuted; 2356 2357 PetscBool row_identity, col_identity; 2358 PetscContainer c; 2359 PetscInt m, n, M, N; 2360 PetscErrorCode ierr; 2361 2362 PetscFunctionBegin; 2363 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2364 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2365 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2366 if (!row_identity || !col_identity) { 2367 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2368 } 2369 2370 process_group_type pg; 2371 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2372 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2373 lgraph_type& level_graph = *lgraph_p; 2374 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2375 2376 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2377 ilu_permuted(level_graph); 2378 2379 /* put together the new matrix */ 2380 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2381 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2382 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2383 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2384 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2385 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2386 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 2389 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2390 ierr = PetscContainerSetPointer(c, lgraph_p); 2391 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2392 ierr = PetscContainerDestroy(&c); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2398 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2399 { 2400 PetscFunctionBegin; 2401 PetscFunctionReturn(0); 2402 } 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatSolve_MPIAIJ" 2406 /* 2407 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2408 */ 2409 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2410 { 2411 namespace graph_dist = boost::graph::distributed; 2412 2413 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2414 lgraph_type* lgraph_p; 2415 PetscContainer c; 2416 PetscErrorCode ierr; 2417 2418 PetscFunctionBegin; 2419 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2420 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2421 ierr = VecCopy(b, x);CHKERRQ(ierr); 2422 2423 PetscScalar* array_x; 2424 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2425 PetscInt sx; 2426 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2427 2428 PetscScalar* array_b; 2429 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2430 PetscInt sb; 2431 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2432 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2437 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2438 ref_x(array_x, boost::extents[num_vertices(graph)]); 2439 2440 typedef boost::iterator_property_map<array_ref_type::iterator, 2441 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2442 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2443 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2444 2445 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2446 2447 PetscFunctionReturn(0); 2448 } 2449 #endif 2450 2451 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2452 PetscInt nzlocal,nsends,nrecvs; 2453 PetscMPIInt *send_rank,*recv_rank; 2454 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2455 PetscScalar *sbuf_a,**rbuf_a; 2456 PetscErrorCode (*Destroy)(Mat); 2457 } Mat_Redundant; 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2461 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2462 { 2463 PetscErrorCode ierr; 2464 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2465 PetscInt i; 2466 2467 PetscFunctionBegin; 2468 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2469 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2470 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2471 for (i=0; i<redund->nrecvs; i++){ 2472 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2473 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2474 } 2475 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2476 ierr = PetscFree(redund);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatDestroy_MatRedundant" 2482 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2483 { 2484 PetscErrorCode ierr; 2485 PetscContainer container; 2486 Mat_Redundant *redund=PETSC_NULL; 2487 2488 PetscFunctionBegin; 2489 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2490 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2491 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2492 A->ops->destroy = redund->Destroy; 2493 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2494 if (A->ops->destroy) { 2495 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2496 } 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2502 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2503 { 2504 PetscMPIInt rank,size; 2505 MPI_Comm comm=((PetscObject)mat)->comm; 2506 PetscErrorCode ierr; 2507 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2508 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2509 PetscInt *rowrange=mat->rmap->range; 2510 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2511 Mat A=aij->A,B=aij->B,C=*matredundant; 2512 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2513 PetscScalar *sbuf_a; 2514 PetscInt nzlocal=a->nz+b->nz; 2515 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2516 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2517 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2518 MatScalar *aworkA,*aworkB; 2519 PetscScalar *vals; 2520 PetscMPIInt tag1,tag2,tag3,imdex; 2521 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2522 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2523 MPI_Status recv_status,*send_status; 2524 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2525 PetscInt **rbuf_j=PETSC_NULL; 2526 PetscScalar **rbuf_a=PETSC_NULL; 2527 Mat_Redundant *redund=PETSC_NULL; 2528 PetscContainer container; 2529 2530 PetscFunctionBegin; 2531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2533 2534 if (reuse == MAT_REUSE_MATRIX) { 2535 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2536 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2537 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2538 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2539 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2540 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2541 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2542 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2543 2544 nsends = redund->nsends; 2545 nrecvs = redund->nrecvs; 2546 send_rank = redund->send_rank; 2547 recv_rank = redund->recv_rank; 2548 sbuf_nz = redund->sbuf_nz; 2549 rbuf_nz = redund->rbuf_nz; 2550 sbuf_j = redund->sbuf_j; 2551 sbuf_a = redund->sbuf_a; 2552 rbuf_j = redund->rbuf_j; 2553 rbuf_a = redund->rbuf_a; 2554 } 2555 2556 if (reuse == MAT_INITIAL_MATRIX){ 2557 PetscMPIInt subrank,subsize; 2558 PetscInt nleftover,np_subcomm; 2559 /* get the destination processors' id send_rank, nsends and nrecvs */ 2560 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2561 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2562 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2563 np_subcomm = size/nsubcomm; 2564 nleftover = size - nsubcomm*np_subcomm; 2565 nsends = 0; nrecvs = 0; 2566 for (i=0; i<size; i++){ /* i=rank*/ 2567 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2568 send_rank[nsends] = i; nsends++; 2569 recv_rank[nrecvs++] = i; 2570 } 2571 } 2572 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2573 i = size-nleftover-1; 2574 j = 0; 2575 while (j < nsubcomm - nleftover){ 2576 send_rank[nsends++] = i; 2577 i--; j++; 2578 } 2579 } 2580 2581 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2582 for (i=0; i<nleftover; i++){ 2583 recv_rank[nrecvs++] = size-nleftover+i; 2584 } 2585 } 2586 2587 /* allocate sbuf_j, sbuf_a */ 2588 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2589 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2590 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2591 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2592 2593 /* copy mat's local entries into the buffers */ 2594 if (reuse == MAT_INITIAL_MATRIX){ 2595 rownz_max = 0; 2596 rptr = sbuf_j; 2597 cols = sbuf_j + rend-rstart + 1; 2598 vals = sbuf_a; 2599 rptr[0] = 0; 2600 for (i=0; i<rend-rstart; i++){ 2601 row = i + rstart; 2602 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2603 ncols = nzA + nzB; 2604 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2605 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2606 /* load the column indices for this row into cols */ 2607 lwrite = 0; 2608 for (l=0; l<nzB; l++) { 2609 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2610 vals[lwrite] = aworkB[l]; 2611 cols[lwrite++] = ctmp; 2612 } 2613 } 2614 for (l=0; l<nzA; l++){ 2615 vals[lwrite] = aworkA[l]; 2616 cols[lwrite++] = cstart + cworkA[l]; 2617 } 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2620 vals[lwrite] = aworkB[l]; 2621 cols[lwrite++] = ctmp; 2622 } 2623 } 2624 vals += ncols; 2625 cols += ncols; 2626 rptr[i+1] = rptr[i] + ncols; 2627 if (rownz_max < ncols) rownz_max = ncols; 2628 } 2629 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2630 } else { /* only copy matrix values into sbuf_a */ 2631 rptr = sbuf_j; 2632 vals = sbuf_a; 2633 rptr[0] = 0; 2634 for (i=0; i<rend-rstart; i++){ 2635 row = i + rstart; 2636 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2637 ncols = nzA + nzB; 2638 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2639 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2640 lwrite = 0; 2641 for (l=0; l<nzB; l++) { 2642 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2643 } 2644 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2645 for (l=0; l<nzB; l++) { 2646 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2647 } 2648 vals += ncols; 2649 rptr[i+1] = rptr[i] + ncols; 2650 } 2651 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2652 2653 /* send nzlocal to others, and recv other's nzlocal */ 2654 /*--------------------------------------------------*/ 2655 if (reuse == MAT_INITIAL_MATRIX){ 2656 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2657 s_waits2 = s_waits3 + nsends; 2658 s_waits1 = s_waits2 + nsends; 2659 r_waits1 = s_waits1 + nsends; 2660 r_waits2 = r_waits1 + nrecvs; 2661 r_waits3 = r_waits2 + nrecvs; 2662 } else { 2663 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2664 r_waits3 = s_waits3 + nsends; 2665 } 2666 2667 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2668 if (reuse == MAT_INITIAL_MATRIX){ 2669 /* get new tags to keep the communication clean */ 2670 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2671 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2672 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2673 2674 /* post receives of other's nzlocal */ 2675 for (i=0; i<nrecvs; i++){ 2676 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2677 } 2678 /* send nzlocal to others */ 2679 for (i=0; i<nsends; i++){ 2680 sbuf_nz[i] = nzlocal; 2681 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2682 } 2683 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2684 count = nrecvs; 2685 while (count) { 2686 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2687 recv_rank[imdex] = recv_status.MPI_SOURCE; 2688 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2689 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2690 2691 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2692 rbuf_nz[imdex] += i + 2; 2693 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2694 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2695 count--; 2696 } 2697 /* wait on sends of nzlocal */ 2698 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2699 /* send mat->i,j to others, and recv from other's */ 2700 /*------------------------------------------------*/ 2701 for (i=0; i<nsends; i++){ 2702 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2703 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2704 } 2705 /* wait on receives of mat->i,j */ 2706 /*------------------------------*/ 2707 count = nrecvs; 2708 while (count) { 2709 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2710 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2711 count--; 2712 } 2713 /* wait on sends of mat->i,j */ 2714 /*---------------------------*/ 2715 if (nsends) { 2716 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2717 } 2718 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2719 2720 /* post receives, send and receive mat->a */ 2721 /*----------------------------------------*/ 2722 for (imdex=0; imdex<nrecvs; imdex++) { 2723 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2724 } 2725 for (i=0; i<nsends; i++){ 2726 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2727 } 2728 count = nrecvs; 2729 while (count) { 2730 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2731 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2732 count--; 2733 } 2734 if (nsends) { 2735 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2736 } 2737 2738 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2739 2740 /* create redundant matrix */ 2741 /*-------------------------*/ 2742 if (reuse == MAT_INITIAL_MATRIX){ 2743 /* compute rownz_max for preallocation */ 2744 for (imdex=0; imdex<nrecvs; imdex++){ 2745 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2746 rptr = rbuf_j[imdex]; 2747 for (i=0; i<j; i++){ 2748 ncols = rptr[i+1] - rptr[i]; 2749 if (rownz_max < ncols) rownz_max = ncols; 2750 } 2751 } 2752 2753 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2754 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2755 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2756 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2757 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2758 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2759 } else { 2760 C = *matredundant; 2761 } 2762 2763 /* insert local matrix entries */ 2764 rptr = sbuf_j; 2765 cols = sbuf_j + rend-rstart + 1; 2766 vals = sbuf_a; 2767 for (i=0; i<rend-rstart; i++){ 2768 row = i + rstart; 2769 ncols = rptr[i+1] - rptr[i]; 2770 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2771 vals += ncols; 2772 cols += ncols; 2773 } 2774 /* insert received matrix entries */ 2775 for (imdex=0; imdex<nrecvs; imdex++){ 2776 rstart = rowrange[recv_rank[imdex]]; 2777 rend = rowrange[recv_rank[imdex]+1]; 2778 rptr = rbuf_j[imdex]; 2779 cols = rbuf_j[imdex] + rend-rstart + 1; 2780 vals = rbuf_a[imdex]; 2781 for (i=0; i<rend-rstart; i++){ 2782 row = i + rstart; 2783 ncols = rptr[i+1] - rptr[i]; 2784 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2785 vals += ncols; 2786 cols += ncols; 2787 } 2788 } 2789 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2792 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2793 if (reuse == MAT_INITIAL_MATRIX) { 2794 PetscContainer container; 2795 *matredundant = C; 2796 /* create a supporting struct and attach it to C for reuse */ 2797 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2798 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2799 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2800 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2801 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2802 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2803 2804 redund->nzlocal = nzlocal; 2805 redund->nsends = nsends; 2806 redund->nrecvs = nrecvs; 2807 redund->send_rank = send_rank; 2808 redund->recv_rank = recv_rank; 2809 redund->sbuf_nz = sbuf_nz; 2810 redund->rbuf_nz = rbuf_nz; 2811 redund->sbuf_j = sbuf_j; 2812 redund->sbuf_a = sbuf_a; 2813 redund->rbuf_j = rbuf_j; 2814 redund->rbuf_a = rbuf_a; 2815 2816 redund->Destroy = C->ops->destroy; 2817 C->ops->destroy = MatDestroy_MatRedundant; 2818 } 2819 PetscFunctionReturn(0); 2820 } 2821 2822 #undef __FUNCT__ 2823 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2824 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2825 { 2826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2827 PetscErrorCode ierr; 2828 PetscInt i,*idxb = 0; 2829 PetscScalar *va,*vb; 2830 Vec vtmp; 2831 2832 PetscFunctionBegin; 2833 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2834 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2835 if (idx) { 2836 for (i=0; i<A->rmap->n; i++) { 2837 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2838 } 2839 } 2840 2841 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2842 if (idx) { 2843 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2844 } 2845 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2846 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2847 2848 for (i=0; i<A->rmap->n; i++){ 2849 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2850 va[i] = vb[i]; 2851 if (idx) idx[i] = a->garray[idxb[i]]; 2852 } 2853 } 2854 2855 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2856 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2857 ierr = PetscFree(idxb);CHKERRQ(ierr); 2858 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 #undef __FUNCT__ 2863 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2864 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2865 { 2866 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2867 PetscErrorCode ierr; 2868 PetscInt i,*idxb = 0; 2869 PetscScalar *va,*vb; 2870 Vec vtmp; 2871 2872 PetscFunctionBegin; 2873 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2874 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2875 if (idx) { 2876 for (i=0; i<A->cmap->n; i++) { 2877 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2878 } 2879 } 2880 2881 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2882 if (idx) { 2883 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2884 } 2885 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2886 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2887 2888 for (i=0; i<A->rmap->n; i++){ 2889 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2890 va[i] = vb[i]; 2891 if (idx) idx[i] = a->garray[idxb[i]]; 2892 } 2893 } 2894 2895 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2896 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2897 ierr = PetscFree(idxb);CHKERRQ(ierr); 2898 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2899 PetscFunctionReturn(0); 2900 } 2901 2902 #undef __FUNCT__ 2903 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2904 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2905 { 2906 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2907 PetscInt n = A->rmap->n; 2908 PetscInt cstart = A->cmap->rstart; 2909 PetscInt *cmap = mat->garray; 2910 PetscInt *diagIdx, *offdiagIdx; 2911 Vec diagV, offdiagV; 2912 PetscScalar *a, *diagA, *offdiagA; 2913 PetscInt r; 2914 PetscErrorCode ierr; 2915 2916 PetscFunctionBegin; 2917 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2918 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2919 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2920 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2921 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2922 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2923 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2924 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2925 for(r = 0; r < n; ++r) { 2926 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2927 a[r] = diagA[r]; 2928 idx[r] = cstart + diagIdx[r]; 2929 } else { 2930 a[r] = offdiagA[r]; 2931 idx[r] = cmap[offdiagIdx[r]]; 2932 } 2933 } 2934 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2935 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2936 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2937 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2938 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2939 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2940 PetscFunctionReturn(0); 2941 } 2942 2943 #undef __FUNCT__ 2944 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2945 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2946 { 2947 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2948 PetscInt n = A->rmap->n; 2949 PetscInt cstart = A->cmap->rstart; 2950 PetscInt *cmap = mat->garray; 2951 PetscInt *diagIdx, *offdiagIdx; 2952 Vec diagV, offdiagV; 2953 PetscScalar *a, *diagA, *offdiagA; 2954 PetscInt r; 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2959 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2960 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2961 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2962 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2963 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2964 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2965 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2966 for(r = 0; r < n; ++r) { 2967 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2968 a[r] = diagA[r]; 2969 idx[r] = cstart + diagIdx[r]; 2970 } else { 2971 a[r] = offdiagA[r]; 2972 idx[r] = cmap[offdiagIdx[r]]; 2973 } 2974 } 2975 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2976 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2978 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2979 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2980 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2986 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2987 { 2988 PetscErrorCode ierr; 2989 Mat *dummy; 2990 2991 PetscFunctionBegin; 2992 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2993 *newmat = *dummy; 2994 ierr = PetscFree(dummy);CHKERRQ(ierr); 2995 PetscFunctionReturn(0); 2996 } 2997 2998 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2999 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3002 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3003 { 3004 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3005 PetscErrorCode ierr; 3006 3007 PetscFunctionBegin; 3008 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 3013 /* -------------------------------------------------------------------*/ 3014 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3015 MatGetRow_MPIAIJ, 3016 MatRestoreRow_MPIAIJ, 3017 MatMult_MPIAIJ, 3018 /* 4*/ MatMultAdd_MPIAIJ, 3019 MatMultTranspose_MPIAIJ, 3020 MatMultTransposeAdd_MPIAIJ, 3021 #ifdef PETSC_HAVE_PBGL 3022 MatSolve_MPIAIJ, 3023 #else 3024 0, 3025 #endif 3026 0, 3027 0, 3028 /*10*/ 0, 3029 0, 3030 0, 3031 MatSOR_MPIAIJ, 3032 MatTranspose_MPIAIJ, 3033 /*15*/ MatGetInfo_MPIAIJ, 3034 MatEqual_MPIAIJ, 3035 MatGetDiagonal_MPIAIJ, 3036 MatDiagonalScale_MPIAIJ, 3037 MatNorm_MPIAIJ, 3038 /*20*/ MatAssemblyBegin_MPIAIJ, 3039 MatAssemblyEnd_MPIAIJ, 3040 MatSetOption_MPIAIJ, 3041 MatZeroEntries_MPIAIJ, 3042 /*24*/ MatZeroRows_MPIAIJ, 3043 0, 3044 #ifdef PETSC_HAVE_PBGL 3045 0, 3046 #else 3047 0, 3048 #endif 3049 0, 3050 0, 3051 /*29*/ MatSetUp_MPIAIJ, 3052 #ifdef PETSC_HAVE_PBGL 3053 0, 3054 #else 3055 0, 3056 #endif 3057 0, 3058 0, 3059 0, 3060 /*34*/ MatDuplicate_MPIAIJ, 3061 0, 3062 0, 3063 0, 3064 0, 3065 /*39*/ MatAXPY_MPIAIJ, 3066 MatGetSubMatrices_MPIAIJ, 3067 MatIncreaseOverlap_MPIAIJ, 3068 MatGetValues_MPIAIJ, 3069 MatCopy_MPIAIJ, 3070 /*44*/ MatGetRowMax_MPIAIJ, 3071 MatScale_MPIAIJ, 3072 0, 3073 0, 3074 MatZeroRowsColumns_MPIAIJ, 3075 /*49*/ 0, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*54*/ MatFDColoringCreate_MPIAIJ, 3081 0, 3082 MatSetUnfactored_MPIAIJ, 3083 0, /* MatPermute_MPIAIJ, impl currently broken */ 3084 0, 3085 /*59*/ MatGetSubMatrix_MPIAIJ, 3086 MatDestroy_MPIAIJ, 3087 MatView_MPIAIJ, 3088 0, 3089 0, 3090 /*64*/ 0, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3096 MatGetRowMinAbs_MPIAIJ, 3097 0, 3098 MatSetColoring_MPIAIJ, 3099 #if defined(PETSC_HAVE_ADIC) 3100 MatSetValuesAdic_MPIAIJ, 3101 #else 3102 0, 3103 #endif 3104 MatSetValuesAdifor_MPIAIJ, 3105 /*75*/ MatFDColoringApply_AIJ, 3106 0, 3107 0, 3108 0, 3109 MatFindZeroDiagonals_MPIAIJ, 3110 /*80*/ 0, 3111 0, 3112 0, 3113 /*83*/ MatLoad_MPIAIJ, 3114 0, 3115 0, 3116 0, 3117 0, 3118 0, 3119 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3120 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3121 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3122 MatPtAP_Basic, 3123 MatPtAPSymbolic_MPIAIJ, 3124 /*94*/ MatPtAPNumeric_MPIAIJ, 3125 0, 3126 0, 3127 0, 3128 0, 3129 /*99*/ 0, 3130 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3131 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3132 MatConjugate_MPIAIJ, 3133 0, 3134 /*104*/MatSetValuesRow_MPIAIJ, 3135 MatRealPart_MPIAIJ, 3136 MatImaginaryPart_MPIAIJ, 3137 0, 3138 0, 3139 /*109*/0, 3140 MatGetRedundantMatrix_MPIAIJ, 3141 MatGetRowMin_MPIAIJ, 3142 0, 3143 0, 3144 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3145 0, 3146 0, 3147 0, 3148 0, 3149 /*119*/0, 3150 0, 3151 0, 3152 0, 3153 MatGetMultiProcBlock_MPIAIJ, 3154 /*124*/MatFindNonzeroRows_MPIAIJ, 3155 MatGetColumnNorms_MPIAIJ, 3156 MatInvertBlockDiagonal_MPIAIJ, 3157 0, 3158 MatGetSubMatricesParallel_MPIAIJ, 3159 /*129*/0, 3160 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3161 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3162 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3163 0, 3164 /*134*/0, 3165 0, 3166 0, 3167 0, 3168 0 3169 }; 3170 3171 /* ----------------------------------------------------------------------------------------*/ 3172 3173 EXTERN_C_BEGIN 3174 #undef __FUNCT__ 3175 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3176 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3177 { 3178 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3179 PetscErrorCode ierr; 3180 3181 PetscFunctionBegin; 3182 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3183 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3184 PetscFunctionReturn(0); 3185 } 3186 EXTERN_C_END 3187 3188 EXTERN_C_BEGIN 3189 #undef __FUNCT__ 3190 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3191 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3192 { 3193 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3194 PetscErrorCode ierr; 3195 3196 PetscFunctionBegin; 3197 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3198 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201 EXTERN_C_END 3202 3203 EXTERN_C_BEGIN 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3206 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3207 { 3208 Mat_MPIAIJ *b; 3209 PetscErrorCode ierr; 3210 PetscInt i; 3211 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3212 3213 PetscFunctionBegin; 3214 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3215 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3216 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3217 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3218 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3219 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3220 3221 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3222 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3223 if (d_nnz) { 3224 for (i=0; i<B->rmap->n; i++) { 3225 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3226 } 3227 } 3228 if (o_nnz) { 3229 for (i=0; i<B->rmap->n; i++) { 3230 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3231 } 3232 } 3233 b = (Mat_MPIAIJ*)B->data; 3234 3235 if (!B->preallocated) { 3236 /* Explicitly create 2 MATSEQAIJ matrices. */ 3237 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3238 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3239 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3240 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3241 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3242 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3243 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3244 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3245 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3246 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3247 } 3248 3249 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3250 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3251 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3252 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3253 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3254 B->preallocated = PETSC_TRUE; 3255 PetscFunctionReturn(0); 3256 } 3257 EXTERN_C_END 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3261 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3262 { 3263 Mat mat; 3264 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3265 PetscErrorCode ierr; 3266 3267 PetscFunctionBegin; 3268 *newmat = 0; 3269 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3270 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3271 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3272 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3273 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3274 a = (Mat_MPIAIJ*)mat->data; 3275 3276 mat->factortype = matin->factortype; 3277 mat->rmap->bs = matin->rmap->bs; 3278 mat->cmap->bs = matin->cmap->bs; 3279 mat->assembled = PETSC_TRUE; 3280 mat->insertmode = NOT_SET_VALUES; 3281 mat->preallocated = PETSC_TRUE; 3282 3283 a->size = oldmat->size; 3284 a->rank = oldmat->rank; 3285 a->donotstash = oldmat->donotstash; 3286 a->roworiented = oldmat->roworiented; 3287 a->rowindices = 0; 3288 a->rowvalues = 0; 3289 a->getrowactive = PETSC_FALSE; 3290 3291 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3292 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3293 3294 if (oldmat->colmap) { 3295 #if defined (PETSC_USE_CTABLE) 3296 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3297 #else 3298 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3299 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3300 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3301 #endif 3302 } else a->colmap = 0; 3303 if (oldmat->garray) { 3304 PetscInt len; 3305 len = oldmat->B->cmap->n; 3306 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3307 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3308 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3309 } else a->garray = 0; 3310 3311 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3312 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3313 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3314 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3315 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3317 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3319 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3320 *newmat = mat; 3321 PetscFunctionReturn(0); 3322 } 3323 3324 3325 3326 #undef __FUNCT__ 3327 #define __FUNCT__ "MatLoad_MPIAIJ" 3328 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3329 { 3330 PetscScalar *vals,*svals; 3331 MPI_Comm comm = ((PetscObject)viewer)->comm; 3332 PetscErrorCode ierr; 3333 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3334 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3335 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3336 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3337 PetscInt cend,cstart,n,*rowners,sizesset=1; 3338 int fd; 3339 3340 PetscFunctionBegin; 3341 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3342 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3343 if (!rank) { 3344 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3345 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3346 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3347 } 3348 3349 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3350 3351 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3352 M = header[1]; N = header[2]; 3353 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3354 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3355 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3356 3357 /* If global sizes are set, check if they are consistent with that given in the file */ 3358 if (sizesset) { 3359 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3360 } 3361 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3362 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3363 3364 /* determine ownership of all rows */ 3365 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3366 else m = newMat->rmap->n; /* Set by user */ 3367 3368 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3369 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3370 3371 /* First process needs enough room for process with most rows */ 3372 if (!rank) { 3373 mmax = rowners[1]; 3374 for(i=2; i<=size; i++) { 3375 mmax = PetscMax(mmax, rowners[i]); 3376 } 3377 } else mmax = m; 3378 3379 rowners[0] = 0; 3380 for (i=2; i<=size; i++) { 3381 rowners[i] += rowners[i-1]; 3382 } 3383 rstart = rowners[rank]; 3384 rend = rowners[rank+1]; 3385 3386 /* distribute row lengths to all processors */ 3387 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3388 if (!rank) { 3389 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3390 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3391 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3392 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3393 for (j=0; j<m; j++) { 3394 procsnz[0] += ourlens[j]; 3395 } 3396 for (i=1; i<size; i++) { 3397 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3398 /* calculate the number of nonzeros on each processor */ 3399 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3400 procsnz[i] += rowlengths[j]; 3401 } 3402 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3403 } 3404 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3405 } else { 3406 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3407 } 3408 3409 if (!rank) { 3410 /* determine max buffer needed and allocate it */ 3411 maxnz = 0; 3412 for (i=0; i<size; i++) { 3413 maxnz = PetscMax(maxnz,procsnz[i]); 3414 } 3415 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3416 3417 /* read in my part of the matrix column indices */ 3418 nz = procsnz[0]; 3419 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3420 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3421 3422 /* read in every one elses and ship off */ 3423 for (i=1; i<size; i++) { 3424 nz = procsnz[i]; 3425 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3426 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3427 } 3428 ierr = PetscFree(cols);CHKERRQ(ierr); 3429 } else { 3430 /* determine buffer space needed for message */ 3431 nz = 0; 3432 for (i=0; i<m; i++) { 3433 nz += ourlens[i]; 3434 } 3435 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3436 3437 /* receive message of column indices*/ 3438 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3439 } 3440 3441 /* determine column ownership if matrix is not square */ 3442 if (N != M) { 3443 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3444 else n = newMat->cmap->n; 3445 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3446 cstart = cend - n; 3447 } else { 3448 cstart = rstart; 3449 cend = rend; 3450 n = cend - cstart; 3451 } 3452 3453 /* loop over local rows, determining number of off diagonal entries */ 3454 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3455 jj = 0; 3456 for (i=0; i<m; i++) { 3457 for (j=0; j<ourlens[i]; j++) { 3458 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3459 jj++; 3460 } 3461 } 3462 3463 for (i=0; i<m; i++) { 3464 ourlens[i] -= offlens[i]; 3465 } 3466 if (!sizesset) { 3467 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3468 } 3469 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3470 3471 for (i=0; i<m; i++) { 3472 ourlens[i] += offlens[i]; 3473 } 3474 3475 if (!rank) { 3476 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3477 3478 /* read in my part of the matrix numerical values */ 3479 nz = procsnz[0]; 3480 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3481 3482 /* insert into matrix */ 3483 jj = rstart; 3484 smycols = mycols; 3485 svals = vals; 3486 for (i=0; i<m; i++) { 3487 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3488 smycols += ourlens[i]; 3489 svals += ourlens[i]; 3490 jj++; 3491 } 3492 3493 /* read in other processors and ship out */ 3494 for (i=1; i<size; i++) { 3495 nz = procsnz[i]; 3496 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3497 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3498 } 3499 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3500 } else { 3501 /* receive numeric values */ 3502 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3503 3504 /* receive message of values*/ 3505 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3506 3507 /* insert into matrix */ 3508 jj = rstart; 3509 smycols = mycols; 3510 svals = vals; 3511 for (i=0; i<m; i++) { 3512 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3513 smycols += ourlens[i]; 3514 svals += ourlens[i]; 3515 jj++; 3516 } 3517 } 3518 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3519 ierr = PetscFree(vals);CHKERRQ(ierr); 3520 ierr = PetscFree(mycols);CHKERRQ(ierr); 3521 ierr = PetscFree(rowners);CHKERRQ(ierr); 3522 3523 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3524 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3525 PetscFunctionReturn(0); 3526 } 3527 3528 #undef __FUNCT__ 3529 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3530 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3531 { 3532 PetscErrorCode ierr; 3533 IS iscol_local; 3534 PetscInt csize; 3535 3536 PetscFunctionBegin; 3537 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3538 if (call == MAT_REUSE_MATRIX) { 3539 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3540 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3541 } else { 3542 PetscInt cbs; 3543 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3544 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3545 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3546 } 3547 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3548 if (call == MAT_INITIAL_MATRIX) { 3549 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3550 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3551 } 3552 PetscFunctionReturn(0); 3553 } 3554 3555 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3556 #undef __FUNCT__ 3557 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3558 /* 3559 Not great since it makes two copies of the submatrix, first an SeqAIJ 3560 in local and then by concatenating the local matrices the end result. 3561 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3562 3563 Note: This requires a sequential iscol with all indices. 3564 */ 3565 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3566 { 3567 PetscErrorCode ierr; 3568 PetscMPIInt rank,size; 3569 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3570 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3571 PetscBool allcolumns, colflag; 3572 Mat M,Mreuse; 3573 MatScalar *vwork,*aa; 3574 MPI_Comm comm = ((PetscObject)mat)->comm; 3575 Mat_SeqAIJ *aij; 3576 3577 3578 PetscFunctionBegin; 3579 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3580 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3581 3582 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3583 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3584 if (colflag && ncol == mat->cmap->N){ 3585 allcolumns = PETSC_TRUE; 3586 } else { 3587 allcolumns = PETSC_FALSE; 3588 } 3589 if (call == MAT_REUSE_MATRIX) { 3590 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3591 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3592 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3593 } else { 3594 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3595 } 3596 3597 /* 3598 m - number of local rows 3599 n - number of columns (same on all processors) 3600 rstart - first row in new global matrix generated 3601 */ 3602 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3603 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3604 if (call == MAT_INITIAL_MATRIX) { 3605 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3606 ii = aij->i; 3607 jj = aij->j; 3608 3609 /* 3610 Determine the number of non-zeros in the diagonal and off-diagonal 3611 portions of the matrix in order to do correct preallocation 3612 */ 3613 3614 /* first get start and end of "diagonal" columns */ 3615 if (csize == PETSC_DECIDE) { 3616 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3617 if (mglobal == n) { /* square matrix */ 3618 nlocal = m; 3619 } else { 3620 nlocal = n/size + ((n % size) > rank); 3621 } 3622 } else { 3623 nlocal = csize; 3624 } 3625 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3626 rstart = rend - nlocal; 3627 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3628 3629 /* next, compute all the lengths */ 3630 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3631 olens = dlens + m; 3632 for (i=0; i<m; i++) { 3633 jend = ii[i+1] - ii[i]; 3634 olen = 0; 3635 dlen = 0; 3636 for (j=0; j<jend; j++) { 3637 if (*jj < rstart || *jj >= rend) olen++; 3638 else dlen++; 3639 jj++; 3640 } 3641 olens[i] = olen; 3642 dlens[i] = dlen; 3643 } 3644 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3645 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3646 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3647 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3648 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3649 ierr = PetscFree(dlens);CHKERRQ(ierr); 3650 } else { 3651 PetscInt ml,nl; 3652 3653 M = *newmat; 3654 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3655 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3656 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3657 /* 3658 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3659 rather than the slower MatSetValues(). 3660 */ 3661 M->was_assembled = PETSC_TRUE; 3662 M->assembled = PETSC_FALSE; 3663 } 3664 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3665 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3666 ii = aij->i; 3667 jj = aij->j; 3668 aa = aij->a; 3669 for (i=0; i<m; i++) { 3670 row = rstart + i; 3671 nz = ii[i+1] - ii[i]; 3672 cwork = jj; jj += nz; 3673 vwork = aa; aa += nz; 3674 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3675 } 3676 3677 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3678 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3679 *newmat = M; 3680 3681 /* save submatrix used in processor for next request */ 3682 if (call == MAT_INITIAL_MATRIX) { 3683 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3684 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3685 } 3686 3687 PetscFunctionReturn(0); 3688 } 3689 3690 EXTERN_C_BEGIN 3691 #undef __FUNCT__ 3692 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3693 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3694 { 3695 PetscInt m,cstart, cend,j,nnz,i,d; 3696 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3697 const PetscInt *JJ; 3698 PetscScalar *values; 3699 PetscErrorCode ierr; 3700 3701 PetscFunctionBegin; 3702 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3703 3704 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3705 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3706 m = B->rmap->n; 3707 cstart = B->cmap->rstart; 3708 cend = B->cmap->rend; 3709 rstart = B->rmap->rstart; 3710 3711 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3712 3713 #if defined(PETSC_USE_DEBUGGING) 3714 for (i=0; i<m; i++) { 3715 nnz = Ii[i+1]- Ii[i]; 3716 JJ = J + Ii[i]; 3717 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3718 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3719 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3720 } 3721 #endif 3722 3723 for (i=0; i<m; i++) { 3724 nnz = Ii[i+1]- Ii[i]; 3725 JJ = J + Ii[i]; 3726 nnz_max = PetscMax(nnz_max,nnz); 3727 d = 0; 3728 for (j=0; j<nnz; j++) { 3729 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3730 } 3731 d_nnz[i] = d; 3732 o_nnz[i] = nnz - d; 3733 } 3734 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3735 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3736 3737 if (v) values = (PetscScalar*)v; 3738 else { 3739 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3740 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3741 } 3742 3743 for (i=0; i<m; i++) { 3744 ii = i + rstart; 3745 nnz = Ii[i+1]- Ii[i]; 3746 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3747 } 3748 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3749 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3750 3751 if (!v) { 3752 ierr = PetscFree(values);CHKERRQ(ierr); 3753 } 3754 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3755 PetscFunctionReturn(0); 3756 } 3757 EXTERN_C_END 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3761 /*@ 3762 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3763 (the default parallel PETSc format). 3764 3765 Collective on MPI_Comm 3766 3767 Input Parameters: 3768 + B - the matrix 3769 . i - the indices into j for the start of each local row (starts with zero) 3770 . j - the column indices for each local row (starts with zero) 3771 - v - optional values in the matrix 3772 3773 Level: developer 3774 3775 Notes: 3776 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3777 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3778 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3779 3780 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3781 3782 The format which is used for the sparse matrix input, is equivalent to a 3783 row-major ordering.. i.e for the following matrix, the input data expected is 3784 as shown: 3785 3786 1 0 0 3787 2 0 3 P0 3788 ------- 3789 4 5 6 P1 3790 3791 Process0 [P0]: rows_owned=[0,1] 3792 i = {0,1,3} [size = nrow+1 = 2+1] 3793 j = {0,0,2} [size = nz = 6] 3794 v = {1,2,3} [size = nz = 6] 3795 3796 Process1 [P1]: rows_owned=[2] 3797 i = {0,3} [size = nrow+1 = 1+1] 3798 j = {0,1,2} [size = nz = 6] 3799 v = {4,5,6} [size = nz = 6] 3800 3801 .keywords: matrix, aij, compressed row, sparse, parallel 3802 3803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3804 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3805 @*/ 3806 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3807 { 3808 PetscErrorCode ierr; 3809 3810 PetscFunctionBegin; 3811 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3812 PetscFunctionReturn(0); 3813 } 3814 3815 #undef __FUNCT__ 3816 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3817 /*@C 3818 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3819 (the default parallel PETSc format). For good matrix assembly performance 3820 the user should preallocate the matrix storage by setting the parameters 3821 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3822 performance can be increased by more than a factor of 50. 3823 3824 Collective on MPI_Comm 3825 3826 Input Parameters: 3827 + A - the matrix 3828 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3829 (same value is used for all local rows) 3830 . d_nnz - array containing the number of nonzeros in the various rows of the 3831 DIAGONAL portion of the local submatrix (possibly different for each row) 3832 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3833 The size of this array is equal to the number of local rows, i.e 'm'. 3834 For matrices that will be factored, you must leave room for (and set) 3835 the diagonal entry even if it is zero. 3836 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3837 submatrix (same value is used for all local rows). 3838 - o_nnz - array containing the number of nonzeros in the various rows of the 3839 OFF-DIAGONAL portion of the local submatrix (possibly different for 3840 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3841 structure. The size of this array is equal to the number 3842 of local rows, i.e 'm'. 3843 3844 If the *_nnz parameter is given then the *_nz parameter is ignored 3845 3846 The AIJ format (also called the Yale sparse matrix format or 3847 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3848 storage. The stored row and column indices begin with zero. 3849 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3850 3851 The parallel matrix is partitioned such that the first m0 rows belong to 3852 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3853 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3854 3855 The DIAGONAL portion of the local submatrix of a processor can be defined 3856 as the submatrix which is obtained by extraction the part corresponding to 3857 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3858 first row that belongs to the processor, r2 is the last row belonging to 3859 the this processor, and c1-c2 is range of indices of the local part of a 3860 vector suitable for applying the matrix to. This is an mxn matrix. In the 3861 common case of a square matrix, the row and column ranges are the same and 3862 the DIAGONAL part is also square. The remaining portion of the local 3863 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3864 3865 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3866 3867 You can call MatGetInfo() to get information on how effective the preallocation was; 3868 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3869 You can also run with the option -info and look for messages with the string 3870 malloc in them to see if additional memory allocation was needed. 3871 3872 Example usage: 3873 3874 Consider the following 8x8 matrix with 34 non-zero values, that is 3875 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3876 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3877 as follows: 3878 3879 .vb 3880 1 2 0 | 0 3 0 | 0 4 3881 Proc0 0 5 6 | 7 0 0 | 8 0 3882 9 0 10 | 11 0 0 | 12 0 3883 ------------------------------------- 3884 13 0 14 | 15 16 17 | 0 0 3885 Proc1 0 18 0 | 19 20 21 | 0 0 3886 0 0 0 | 22 23 0 | 24 0 3887 ------------------------------------- 3888 Proc2 25 26 27 | 0 0 28 | 29 0 3889 30 0 0 | 31 32 33 | 0 34 3890 .ve 3891 3892 This can be represented as a collection of submatrices as: 3893 3894 .vb 3895 A B C 3896 D E F 3897 G H I 3898 .ve 3899 3900 Where the submatrices A,B,C are owned by proc0, D,E,F are 3901 owned by proc1, G,H,I are owned by proc2. 3902 3903 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3904 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3905 The 'M','N' parameters are 8,8, and have the same values on all procs. 3906 3907 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3908 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3909 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3910 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3911 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3912 matrix, ans [DF] as another SeqAIJ matrix. 3913 3914 When d_nz, o_nz parameters are specified, d_nz storage elements are 3915 allocated for every row of the local diagonal submatrix, and o_nz 3916 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3917 One way to choose d_nz and o_nz is to use the max nonzerors per local 3918 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3919 In this case, the values of d_nz,o_nz are: 3920 .vb 3921 proc0 : dnz = 2, o_nz = 2 3922 proc1 : dnz = 3, o_nz = 2 3923 proc2 : dnz = 1, o_nz = 4 3924 .ve 3925 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3926 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3927 for proc3. i.e we are using 12+15+10=37 storage locations to store 3928 34 values. 3929 3930 When d_nnz, o_nnz parameters are specified, the storage is specified 3931 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3932 In the above case the values for d_nnz,o_nnz are: 3933 .vb 3934 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3935 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3936 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3937 .ve 3938 Here the space allocated is sum of all the above values i.e 34, and 3939 hence pre-allocation is perfect. 3940 3941 Level: intermediate 3942 3943 .keywords: matrix, aij, compressed row, sparse, parallel 3944 3945 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3946 MPIAIJ, MatGetInfo() 3947 @*/ 3948 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3949 { 3950 PetscErrorCode ierr; 3951 3952 PetscFunctionBegin; 3953 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3954 PetscValidType(B,1); 3955 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3956 PetscFunctionReturn(0); 3957 } 3958 3959 #undef __FUNCT__ 3960 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3961 /*@ 3962 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3963 CSR format the local rows. 3964 3965 Collective on MPI_Comm 3966 3967 Input Parameters: 3968 + comm - MPI communicator 3969 . m - number of local rows (Cannot be PETSC_DECIDE) 3970 . n - This value should be the same as the local size used in creating the 3971 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3972 calculated if N is given) For square matrices n is almost always m. 3973 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3974 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3975 . i - row indices 3976 . j - column indices 3977 - a - matrix values 3978 3979 Output Parameter: 3980 . mat - the matrix 3981 3982 Level: intermediate 3983 3984 Notes: 3985 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3986 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3987 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3988 3989 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3990 3991 The format which is used for the sparse matrix input, is equivalent to a 3992 row-major ordering.. i.e for the following matrix, the input data expected is 3993 as shown: 3994 3995 1 0 0 3996 2 0 3 P0 3997 ------- 3998 4 5 6 P1 3999 4000 Process0 [P0]: rows_owned=[0,1] 4001 i = {0,1,3} [size = nrow+1 = 2+1] 4002 j = {0,0,2} [size = nz = 6] 4003 v = {1,2,3} [size = nz = 6] 4004 4005 Process1 [P1]: rows_owned=[2] 4006 i = {0,3} [size = nrow+1 = 1+1] 4007 j = {0,1,2} [size = nz = 6] 4008 v = {4,5,6} [size = nz = 6] 4009 4010 .keywords: matrix, aij, compressed row, sparse, parallel 4011 4012 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4013 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4014 @*/ 4015 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4016 { 4017 PetscErrorCode ierr; 4018 4019 PetscFunctionBegin; 4020 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4021 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4022 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4023 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4024 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4025 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4026 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4027 PetscFunctionReturn(0); 4028 } 4029 4030 #undef __FUNCT__ 4031 #define __FUNCT__ "MatCreateAIJ" 4032 /*@C 4033 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4034 (the default parallel PETSc format). For good matrix assembly performance 4035 the user should preallocate the matrix storage by setting the parameters 4036 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4037 performance can be increased by more than a factor of 50. 4038 4039 Collective on MPI_Comm 4040 4041 Input Parameters: 4042 + comm - MPI communicator 4043 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4044 This value should be the same as the local size used in creating the 4045 y vector for the matrix-vector product y = Ax. 4046 . n - This value should be the same as the local size used in creating the 4047 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4048 calculated if N is given) For square matrices n is almost always m. 4049 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4050 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4051 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4052 (same value is used for all local rows) 4053 . d_nnz - array containing the number of nonzeros in the various rows of the 4054 DIAGONAL portion of the local submatrix (possibly different for each row) 4055 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4056 The size of this array is equal to the number of local rows, i.e 'm'. 4057 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4058 submatrix (same value is used for all local rows). 4059 - o_nnz - array containing the number of nonzeros in the various rows of the 4060 OFF-DIAGONAL portion of the local submatrix (possibly different for 4061 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4062 structure. The size of this array is equal to the number 4063 of local rows, i.e 'm'. 4064 4065 Output Parameter: 4066 . A - the matrix 4067 4068 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4069 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4070 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4071 4072 Notes: 4073 If the *_nnz parameter is given then the *_nz parameter is ignored 4074 4075 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4076 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4077 storage requirements for this matrix. 4078 4079 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4080 processor than it must be used on all processors that share the object for 4081 that argument. 4082 4083 The user MUST specify either the local or global matrix dimensions 4084 (possibly both). 4085 4086 The parallel matrix is partitioned across processors such that the 4087 first m0 rows belong to process 0, the next m1 rows belong to 4088 process 1, the next m2 rows belong to process 2 etc.. where 4089 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4090 values corresponding to [m x N] submatrix. 4091 4092 The columns are logically partitioned with the n0 columns belonging 4093 to 0th partition, the next n1 columns belonging to the next 4094 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4095 4096 The DIAGONAL portion of the local submatrix on any given processor 4097 is the submatrix corresponding to the rows and columns m,n 4098 corresponding to the given processor. i.e diagonal matrix on 4099 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4100 etc. The remaining portion of the local submatrix [m x (N-n)] 4101 constitute the OFF-DIAGONAL portion. The example below better 4102 illustrates this concept. 4103 4104 For a square global matrix we define each processor's diagonal portion 4105 to be its local rows and the corresponding columns (a square submatrix); 4106 each processor's off-diagonal portion encompasses the remainder of the 4107 local matrix (a rectangular submatrix). 4108 4109 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4110 4111 When calling this routine with a single process communicator, a matrix of 4112 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4113 type of communicator, use the construction mechanism: 4114 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4115 4116 By default, this format uses inodes (identical nodes) when possible. 4117 We search for consecutive rows with the same nonzero structure, thereby 4118 reusing matrix information to achieve increased efficiency. 4119 4120 Options Database Keys: 4121 + -mat_no_inode - Do not use inodes 4122 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4123 - -mat_aij_oneindex - Internally use indexing starting at 1 4124 rather than 0. Note that when calling MatSetValues(), 4125 the user still MUST index entries starting at 0! 4126 4127 4128 Example usage: 4129 4130 Consider the following 8x8 matrix with 34 non-zero values, that is 4131 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4132 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4133 as follows: 4134 4135 .vb 4136 1 2 0 | 0 3 0 | 0 4 4137 Proc0 0 5 6 | 7 0 0 | 8 0 4138 9 0 10 | 11 0 0 | 12 0 4139 ------------------------------------- 4140 13 0 14 | 15 16 17 | 0 0 4141 Proc1 0 18 0 | 19 20 21 | 0 0 4142 0 0 0 | 22 23 0 | 24 0 4143 ------------------------------------- 4144 Proc2 25 26 27 | 0 0 28 | 29 0 4145 30 0 0 | 31 32 33 | 0 34 4146 .ve 4147 4148 This can be represented as a collection of submatrices as: 4149 4150 .vb 4151 A B C 4152 D E F 4153 G H I 4154 .ve 4155 4156 Where the submatrices A,B,C are owned by proc0, D,E,F are 4157 owned by proc1, G,H,I are owned by proc2. 4158 4159 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4160 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4161 The 'M','N' parameters are 8,8, and have the same values on all procs. 4162 4163 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4164 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4165 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4166 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4167 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4168 matrix, ans [DF] as another SeqAIJ matrix. 4169 4170 When d_nz, o_nz parameters are specified, d_nz storage elements are 4171 allocated for every row of the local diagonal submatrix, and o_nz 4172 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4173 One way to choose d_nz and o_nz is to use the max nonzerors per local 4174 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4175 In this case, the values of d_nz,o_nz are: 4176 .vb 4177 proc0 : dnz = 2, o_nz = 2 4178 proc1 : dnz = 3, o_nz = 2 4179 proc2 : dnz = 1, o_nz = 4 4180 .ve 4181 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4182 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4183 for proc3. i.e we are using 12+15+10=37 storage locations to store 4184 34 values. 4185 4186 When d_nnz, o_nnz parameters are specified, the storage is specified 4187 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4188 In the above case the values for d_nnz,o_nnz are: 4189 .vb 4190 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4191 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4192 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4193 .ve 4194 Here the space allocated is sum of all the above values i.e 34, and 4195 hence pre-allocation is perfect. 4196 4197 Level: intermediate 4198 4199 .keywords: matrix, aij, compressed row, sparse, parallel 4200 4201 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4202 MPIAIJ, MatCreateMPIAIJWithArrays() 4203 @*/ 4204 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4205 { 4206 PetscErrorCode ierr; 4207 PetscMPIInt size; 4208 4209 PetscFunctionBegin; 4210 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4211 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4212 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4213 if (size > 1) { 4214 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4215 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4216 } else { 4217 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4218 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4219 } 4220 PetscFunctionReturn(0); 4221 } 4222 4223 #undef __FUNCT__ 4224 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4225 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4226 { 4227 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4228 4229 PetscFunctionBegin; 4230 *Ad = a->A; 4231 *Ao = a->B; 4232 *colmap = a->garray; 4233 PetscFunctionReturn(0); 4234 } 4235 4236 #undef __FUNCT__ 4237 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4238 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4239 { 4240 PetscErrorCode ierr; 4241 PetscInt i; 4242 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4243 4244 PetscFunctionBegin; 4245 if (coloring->ctype == IS_COLORING_GLOBAL) { 4246 ISColoringValue *allcolors,*colors; 4247 ISColoring ocoloring; 4248 4249 /* set coloring for diagonal portion */ 4250 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4251 4252 /* set coloring for off-diagonal portion */ 4253 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4254 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4255 for (i=0; i<a->B->cmap->n; i++) { 4256 colors[i] = allcolors[a->garray[i]]; 4257 } 4258 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4259 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4260 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4261 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4262 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4263 ISColoringValue *colors; 4264 PetscInt *larray; 4265 ISColoring ocoloring; 4266 4267 /* set coloring for diagonal portion */ 4268 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4269 for (i=0; i<a->A->cmap->n; i++) { 4270 larray[i] = i + A->cmap->rstart; 4271 } 4272 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4273 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4274 for (i=0; i<a->A->cmap->n; i++) { 4275 colors[i] = coloring->colors[larray[i]]; 4276 } 4277 ierr = PetscFree(larray);CHKERRQ(ierr); 4278 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4279 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4280 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4281 4282 /* set coloring for off-diagonal portion */ 4283 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4284 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4285 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4286 for (i=0; i<a->B->cmap->n; i++) { 4287 colors[i] = coloring->colors[larray[i]]; 4288 } 4289 ierr = PetscFree(larray);CHKERRQ(ierr); 4290 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4291 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4292 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4293 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4294 4295 PetscFunctionReturn(0); 4296 } 4297 4298 #if defined(PETSC_HAVE_ADIC) 4299 #undef __FUNCT__ 4300 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4301 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4302 { 4303 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4304 PetscErrorCode ierr; 4305 4306 PetscFunctionBegin; 4307 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4308 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4309 PetscFunctionReturn(0); 4310 } 4311 #endif 4312 4313 #undef __FUNCT__ 4314 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4315 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4316 { 4317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4318 PetscErrorCode ierr; 4319 4320 PetscFunctionBegin; 4321 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4322 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4323 PetscFunctionReturn(0); 4324 } 4325 4326 #undef __FUNCT__ 4327 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4328 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4329 { 4330 PetscErrorCode ierr; 4331 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4332 PetscInt *indx; 4333 4334 PetscFunctionBegin; 4335 /* This routine will ONLY return MPIAIJ type matrix */ 4336 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4337 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4338 if (n == PETSC_DECIDE){ 4339 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4340 } 4341 /* Check sum(n) = N */ 4342 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4343 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4344 4345 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4346 rstart -= m; 4347 4348 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4349 for (i=0;i<m;i++) { 4350 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4351 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4352 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4353 } 4354 4355 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4356 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4357 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4358 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4359 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4360 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4361 PetscFunctionReturn(0); 4362 } 4363 4364 #undef __FUNCT__ 4365 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4366 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4367 { 4368 PetscErrorCode ierr; 4369 PetscInt m,N,i,rstart,nnz,Ii; 4370 PetscInt *indx; 4371 PetscScalar *values; 4372 4373 PetscFunctionBegin; 4374 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4375 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4376 for (i=0;i<m;i++) { 4377 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4378 Ii = i + rstart; 4379 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4380 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4381 } 4382 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 PetscFunctionReturn(0); 4385 } 4386 4387 #undef __FUNCT__ 4388 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4389 /*@ 4390 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4391 matrices from each processor 4392 4393 Collective on MPI_Comm 4394 4395 Input Parameters: 4396 + comm - the communicators the parallel matrix will live on 4397 . inmat - the input sequential matrices 4398 . n - number of local columns (or PETSC_DECIDE) 4399 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4400 4401 Output Parameter: 4402 . outmat - the parallel matrix generated 4403 4404 Level: advanced 4405 4406 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4407 4408 @*/ 4409 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4410 { 4411 PetscErrorCode ierr; 4412 4413 PetscFunctionBegin; 4414 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4415 if (scall == MAT_INITIAL_MATRIX){ 4416 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4417 } 4418 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4419 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "MatFileSplit" 4425 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4426 { 4427 PetscErrorCode ierr; 4428 PetscMPIInt rank; 4429 PetscInt m,N,i,rstart,nnz; 4430 size_t len; 4431 const PetscInt *indx; 4432 PetscViewer out; 4433 char *name; 4434 Mat B; 4435 const PetscScalar *values; 4436 4437 PetscFunctionBegin; 4438 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4439 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4440 /* Should this be the type of the diagonal block of A? */ 4441 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4442 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4443 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4444 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4445 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4446 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4447 for (i=0;i<m;i++) { 4448 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4449 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4450 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4451 } 4452 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4453 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4454 4455 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4456 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4457 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4458 sprintf(name,"%s.%d",outfile,rank); 4459 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4460 ierr = PetscFree(name); 4461 ierr = MatView(B,out);CHKERRQ(ierr); 4462 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4463 ierr = MatDestroy(&B);CHKERRQ(ierr); 4464 PetscFunctionReturn(0); 4465 } 4466 4467 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4468 #undef __FUNCT__ 4469 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4470 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4471 { 4472 PetscErrorCode ierr; 4473 Mat_Merge_SeqsToMPI *merge; 4474 PetscContainer container; 4475 4476 PetscFunctionBegin; 4477 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4478 if (container) { 4479 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4492 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4493 ierr = PetscFree(merge);CHKERRQ(ierr); 4494 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4495 } 4496 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4497 PetscFunctionReturn(0); 4498 } 4499 4500 #include <../src/mat/utils/freespace.h> 4501 #include <petscbt.h> 4502 4503 #undef __FUNCT__ 4504 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4505 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4506 { 4507 PetscErrorCode ierr; 4508 MPI_Comm comm=((PetscObject)mpimat)->comm; 4509 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4510 PetscMPIInt size,rank,taga,*len_s; 4511 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4512 PetscInt proc,m; 4513 PetscInt **buf_ri,**buf_rj; 4514 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4515 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4516 MPI_Request *s_waits,*r_waits; 4517 MPI_Status *status; 4518 MatScalar *aa=a->a; 4519 MatScalar **abuf_r,*ba_i; 4520 Mat_Merge_SeqsToMPI *merge; 4521 PetscContainer container; 4522 4523 PetscFunctionBegin; 4524 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4525 4526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4528 4529 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4530 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4531 4532 bi = merge->bi; 4533 bj = merge->bj; 4534 buf_ri = merge->buf_ri; 4535 buf_rj = merge->buf_rj; 4536 4537 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4538 owners = merge->rowmap->range; 4539 len_s = merge->len_s; 4540 4541 /* send and recv matrix values */ 4542 /*-----------------------------*/ 4543 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4544 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4545 4546 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4547 for (proc=0,k=0; proc<size; proc++){ 4548 if (!len_s[proc]) continue; 4549 i = owners[proc]; 4550 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4551 k++; 4552 } 4553 4554 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4555 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4556 ierr = PetscFree(status);CHKERRQ(ierr); 4557 4558 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4559 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4560 4561 /* insert mat values of mpimat */ 4562 /*----------------------------*/ 4563 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4564 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4565 4566 for (k=0; k<merge->nrecv; k++){ 4567 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4568 nrows = *(buf_ri_k[k]); 4569 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4570 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4571 } 4572 4573 /* set values of ba */ 4574 m = merge->rowmap->n; 4575 for (i=0; i<m; i++) { 4576 arow = owners[rank] + i; 4577 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4578 bnzi = bi[i+1] - bi[i]; 4579 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4580 4581 /* add local non-zero vals of this proc's seqmat into ba */ 4582 anzi = ai[arow+1] - ai[arow]; 4583 aj = a->j + ai[arow]; 4584 aa = a->a + ai[arow]; 4585 nextaj = 0; 4586 for (j=0; nextaj<anzi; j++){ 4587 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4588 ba_i[j] += aa[nextaj++]; 4589 } 4590 } 4591 4592 /* add received vals into ba */ 4593 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4594 /* i-th row */ 4595 if (i == *nextrow[k]) { 4596 anzi = *(nextai[k]+1) - *nextai[k]; 4597 aj = buf_rj[k] + *(nextai[k]); 4598 aa = abuf_r[k] + *(nextai[k]); 4599 nextaj = 0; 4600 for (j=0; nextaj<anzi; j++){ 4601 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4602 ba_i[j] += aa[nextaj++]; 4603 } 4604 } 4605 nextrow[k]++; nextai[k]++; 4606 } 4607 } 4608 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4609 } 4610 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4611 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 4613 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4614 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4615 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4616 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4617 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4618 PetscFunctionReturn(0); 4619 } 4620 4621 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4622 4623 #undef __FUNCT__ 4624 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4625 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4626 { 4627 PetscErrorCode ierr; 4628 Mat B_mpi; 4629 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4630 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4631 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4632 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4633 PetscInt len,proc,*dnz,*onz,bs,cbs; 4634 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4635 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4636 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4637 MPI_Status *status; 4638 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4639 PetscBT lnkbt; 4640 Mat_Merge_SeqsToMPI *merge; 4641 PetscContainer container; 4642 4643 PetscFunctionBegin; 4644 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4645 4646 /* make sure it is a PETSc comm */ 4647 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4648 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4649 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4650 4651 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4652 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4653 4654 /* determine row ownership */ 4655 /*---------------------------------------------------------*/ 4656 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4657 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4658 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4659 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4660 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4661 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4662 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4663 4664 m = merge->rowmap->n; 4665 M = merge->rowmap->N; 4666 owners = merge->rowmap->range; 4667 4668 /* determine the number of messages to send, their lengths */ 4669 /*---------------------------------------------------------*/ 4670 len_s = merge->len_s; 4671 4672 len = 0; /* length of buf_si[] */ 4673 merge->nsend = 0; 4674 for (proc=0; proc<size; proc++){ 4675 len_si[proc] = 0; 4676 if (proc == rank){ 4677 len_s[proc] = 0; 4678 } else { 4679 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4680 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4681 } 4682 if (len_s[proc]) { 4683 merge->nsend++; 4684 nrows = 0; 4685 for (i=owners[proc]; i<owners[proc+1]; i++){ 4686 if (ai[i+1] > ai[i]) nrows++; 4687 } 4688 len_si[proc] = 2*(nrows+1); 4689 len += len_si[proc]; 4690 } 4691 } 4692 4693 /* determine the number and length of messages to receive for ij-structure */ 4694 /*-------------------------------------------------------------------------*/ 4695 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4696 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4697 4698 /* post the Irecv of j-structure */ 4699 /*-------------------------------*/ 4700 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4701 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4702 4703 /* post the Isend of j-structure */ 4704 /*--------------------------------*/ 4705 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4706 4707 for (proc=0, k=0; proc<size; proc++){ 4708 if (!len_s[proc]) continue; 4709 i = owners[proc]; 4710 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4711 k++; 4712 } 4713 4714 /* receives and sends of j-structure are complete */ 4715 /*------------------------------------------------*/ 4716 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4717 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4718 4719 /* send and recv i-structure */ 4720 /*---------------------------*/ 4721 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4722 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4723 4724 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4725 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4726 for (proc=0,k=0; proc<size; proc++){ 4727 if (!len_s[proc]) continue; 4728 /* form outgoing message for i-structure: 4729 buf_si[0]: nrows to be sent 4730 [1:nrows]: row index (global) 4731 [nrows+1:2*nrows+1]: i-structure index 4732 */ 4733 /*-------------------------------------------*/ 4734 nrows = len_si[proc]/2 - 1; 4735 buf_si_i = buf_si + nrows+1; 4736 buf_si[0] = nrows; 4737 buf_si_i[0] = 0; 4738 nrows = 0; 4739 for (i=owners[proc]; i<owners[proc+1]; i++){ 4740 anzi = ai[i+1] - ai[i]; 4741 if (anzi) { 4742 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4743 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4744 nrows++; 4745 } 4746 } 4747 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4748 k++; 4749 buf_si += len_si[proc]; 4750 } 4751 4752 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4753 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4754 4755 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4756 for (i=0; i<merge->nrecv; i++){ 4757 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4758 } 4759 4760 ierr = PetscFree(len_si);CHKERRQ(ierr); 4761 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4762 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4763 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4764 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4765 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4766 ierr = PetscFree(status);CHKERRQ(ierr); 4767 4768 /* compute a local seq matrix in each processor */ 4769 /*----------------------------------------------*/ 4770 /* allocate bi array and free space for accumulating nonzero column info */ 4771 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4772 bi[0] = 0; 4773 4774 /* create and initialize a linked list */ 4775 nlnk = N+1; 4776 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4777 4778 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4779 len = 0; 4780 len = ai[owners[rank+1]] - ai[owners[rank]]; 4781 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4782 current_space = free_space; 4783 4784 /* determine symbolic info for each local row */ 4785 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4786 4787 for (k=0; k<merge->nrecv; k++){ 4788 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4789 nrows = *buf_ri_k[k]; 4790 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4791 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4792 } 4793 4794 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4795 len = 0; 4796 for (i=0;i<m;i++) { 4797 bnzi = 0; 4798 /* add local non-zero cols of this proc's seqmat into lnk */ 4799 arow = owners[rank] + i; 4800 anzi = ai[arow+1] - ai[arow]; 4801 aj = a->j + ai[arow]; 4802 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4803 bnzi += nlnk; 4804 /* add received col data into lnk */ 4805 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4806 if (i == *nextrow[k]) { /* i-th row */ 4807 anzi = *(nextai[k]+1) - *nextai[k]; 4808 aj = buf_rj[k] + *nextai[k]; 4809 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4810 bnzi += nlnk; 4811 nextrow[k]++; nextai[k]++; 4812 } 4813 } 4814 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4815 4816 /* if free space is not available, make more free space */ 4817 if (current_space->local_remaining<bnzi) { 4818 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4819 nspacedouble++; 4820 } 4821 /* copy data into free space, then initialize lnk */ 4822 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4823 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4824 4825 current_space->array += bnzi; 4826 current_space->local_used += bnzi; 4827 current_space->local_remaining -= bnzi; 4828 4829 bi[i+1] = bi[i] + bnzi; 4830 } 4831 4832 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4833 4834 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4835 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4836 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4837 4838 /* create symbolic parallel matrix B_mpi */ 4839 /*---------------------------------------*/ 4840 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4841 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4842 if (n==PETSC_DECIDE) { 4843 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4844 } else { 4845 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4846 } 4847 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4848 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4849 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4850 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4851 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4852 4853 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4854 B_mpi->assembled = PETSC_FALSE; 4855 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4856 merge->bi = bi; 4857 merge->bj = bj; 4858 merge->buf_ri = buf_ri; 4859 merge->buf_rj = buf_rj; 4860 merge->coi = PETSC_NULL; 4861 merge->coj = PETSC_NULL; 4862 merge->owners_co = PETSC_NULL; 4863 4864 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4865 4866 /* attach the supporting struct to B_mpi for reuse */ 4867 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4868 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4869 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4870 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4871 *mpimat = B_mpi; 4872 4873 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4874 PetscFunctionReturn(0); 4875 } 4876 4877 #undef __FUNCT__ 4878 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4879 /*@C 4880 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4881 matrices from each processor 4882 4883 Collective on MPI_Comm 4884 4885 Input Parameters: 4886 + comm - the communicators the parallel matrix will live on 4887 . seqmat - the input sequential matrices 4888 . m - number of local rows (or PETSC_DECIDE) 4889 . n - number of local columns (or PETSC_DECIDE) 4890 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4891 4892 Output Parameter: 4893 . mpimat - the parallel matrix generated 4894 4895 Level: advanced 4896 4897 Notes: 4898 The dimensions of the sequential matrix in each processor MUST be the same. 4899 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4900 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4901 @*/ 4902 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4903 { 4904 PetscErrorCode ierr; 4905 PetscMPIInt size; 4906 4907 PetscFunctionBegin; 4908 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4909 if (size == 1){ 4910 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4911 if (scall == MAT_INITIAL_MATRIX){ 4912 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4913 } else { 4914 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4915 } 4916 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4917 PetscFunctionReturn(0); 4918 } 4919 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 if (scall == MAT_INITIAL_MATRIX){ 4921 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4922 } 4923 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4924 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 4928 #undef __FUNCT__ 4929 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4930 /*@ 4931 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4932 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4933 with MatGetSize() 4934 4935 Not Collective 4936 4937 Input Parameters: 4938 + A - the matrix 4939 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4940 4941 Output Parameter: 4942 . A_loc - the local sequential matrix generated 4943 4944 Level: developer 4945 4946 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4947 4948 @*/ 4949 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4950 { 4951 PetscErrorCode ierr; 4952 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4953 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4954 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4955 MatScalar *aa=a->a,*ba=b->a,*cam; 4956 PetscScalar *ca; 4957 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4958 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4959 PetscBool match; 4960 4961 PetscFunctionBegin; 4962 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4963 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4964 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4965 if (scall == MAT_INITIAL_MATRIX){ 4966 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4967 ci[0] = 0; 4968 for (i=0; i<am; i++){ 4969 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4970 } 4971 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4972 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4973 k = 0; 4974 for (i=0; i<am; i++) { 4975 ncols_o = bi[i+1] - bi[i]; 4976 ncols_d = ai[i+1] - ai[i]; 4977 /* off-diagonal portion of A */ 4978 for (jo=0; jo<ncols_o; jo++) { 4979 col = cmap[*bj]; 4980 if (col >= cstart) break; 4981 cj[k] = col; bj++; 4982 ca[k++] = *ba++; 4983 } 4984 /* diagonal portion of A */ 4985 for (j=0; j<ncols_d; j++) { 4986 cj[k] = cstart + *aj++; 4987 ca[k++] = *aa++; 4988 } 4989 /* off-diagonal portion of A */ 4990 for (j=jo; j<ncols_o; j++) { 4991 cj[k] = cmap[*bj++]; 4992 ca[k++] = *ba++; 4993 } 4994 } 4995 /* put together the new matrix */ 4996 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4997 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4998 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4999 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5000 mat->free_a = PETSC_TRUE; 5001 mat->free_ij = PETSC_TRUE; 5002 mat->nonew = 0; 5003 } else if (scall == MAT_REUSE_MATRIX){ 5004 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5005 ci = mat->i; cj = mat->j; cam = mat->a; 5006 for (i=0; i<am; i++) { 5007 /* off-diagonal portion of A */ 5008 ncols_o = bi[i+1] - bi[i]; 5009 for (jo=0; jo<ncols_o; jo++) { 5010 col = cmap[*bj]; 5011 if (col >= cstart) break; 5012 *cam++ = *ba++; bj++; 5013 } 5014 /* diagonal portion of A */ 5015 ncols_d = ai[i+1] - ai[i]; 5016 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5017 /* off-diagonal portion of A */ 5018 for (j=jo; j<ncols_o; j++) { 5019 *cam++ = *ba++; bj++; 5020 } 5021 } 5022 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5023 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5024 PetscFunctionReturn(0); 5025 } 5026 5027 #undef __FUNCT__ 5028 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5029 /*@C 5030 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5031 5032 Not Collective 5033 5034 Input Parameters: 5035 + A - the matrix 5036 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5037 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5038 5039 Output Parameter: 5040 . A_loc - the local sequential matrix generated 5041 5042 Level: developer 5043 5044 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5045 5046 @*/ 5047 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5048 { 5049 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5050 PetscErrorCode ierr; 5051 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5052 IS isrowa,iscola; 5053 Mat *aloc; 5054 PetscBool match; 5055 5056 PetscFunctionBegin; 5057 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5058 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5059 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5060 if (!row){ 5061 start = A->rmap->rstart; end = A->rmap->rend; 5062 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5063 } else { 5064 isrowa = *row; 5065 } 5066 if (!col){ 5067 start = A->cmap->rstart; 5068 cmap = a->garray; 5069 nzA = a->A->cmap->n; 5070 nzB = a->B->cmap->n; 5071 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5072 ncols = 0; 5073 for (i=0; i<nzB; i++) { 5074 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5075 else break; 5076 } 5077 imark = i; 5078 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5079 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5080 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5081 } else { 5082 iscola = *col; 5083 } 5084 if (scall != MAT_INITIAL_MATRIX){ 5085 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5086 aloc[0] = *A_loc; 5087 } 5088 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5089 *A_loc = aloc[0]; 5090 ierr = PetscFree(aloc);CHKERRQ(ierr); 5091 if (!row){ 5092 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5093 } 5094 if (!col){ 5095 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5096 } 5097 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5098 PetscFunctionReturn(0); 5099 } 5100 5101 #undef __FUNCT__ 5102 #define __FUNCT__ "MatGetBrowsOfAcols" 5103 /*@C 5104 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5105 5106 Collective on Mat 5107 5108 Input Parameters: 5109 + A,B - the matrices in mpiaij format 5110 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5111 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5112 5113 Output Parameter: 5114 + rowb, colb - index sets of rows and columns of B to extract 5115 - B_seq - the sequential matrix generated 5116 5117 Level: developer 5118 5119 @*/ 5120 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5121 { 5122 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5123 PetscErrorCode ierr; 5124 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5125 IS isrowb,iscolb; 5126 Mat *bseq=PETSC_NULL; 5127 5128 PetscFunctionBegin; 5129 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5130 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5131 } 5132 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5133 5134 if (scall == MAT_INITIAL_MATRIX){ 5135 start = A->cmap->rstart; 5136 cmap = a->garray; 5137 nzA = a->A->cmap->n; 5138 nzB = a->B->cmap->n; 5139 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5140 ncols = 0; 5141 for (i=0; i<nzB; i++) { /* row < local row index */ 5142 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5143 else break; 5144 } 5145 imark = i; 5146 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5147 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5148 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5149 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5150 } else { 5151 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5152 isrowb = *rowb; iscolb = *colb; 5153 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5154 bseq[0] = *B_seq; 5155 } 5156 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5157 *B_seq = bseq[0]; 5158 ierr = PetscFree(bseq);CHKERRQ(ierr); 5159 if (!rowb){ 5160 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5161 } else { 5162 *rowb = isrowb; 5163 } 5164 if (!colb){ 5165 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5166 } else { 5167 *colb = iscolb; 5168 } 5169 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5170 PetscFunctionReturn(0); 5171 } 5172 5173 #undef __FUNCT__ 5174 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5175 /* 5176 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5177 of the OFF-DIAGONAL portion of local A 5178 5179 Collective on Mat 5180 5181 Input Parameters: 5182 + A,B - the matrices in mpiaij format 5183 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5184 5185 Output Parameter: 5186 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5187 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5188 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5189 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5190 5191 Level: developer 5192 5193 */ 5194 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5195 { 5196 VecScatter_MPI_General *gen_to,*gen_from; 5197 PetscErrorCode ierr; 5198 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5199 Mat_SeqAIJ *b_oth; 5200 VecScatter ctx=a->Mvctx; 5201 MPI_Comm comm=((PetscObject)ctx)->comm; 5202 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5203 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5204 PetscScalar *rvalues,*svalues; 5205 MatScalar *b_otha,*bufa,*bufA; 5206 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5207 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5208 MPI_Status *sstatus,rstatus; 5209 PetscMPIInt jj; 5210 PetscInt *cols,sbs,rbs; 5211 PetscScalar *vals; 5212 5213 PetscFunctionBegin; 5214 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5215 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5216 } 5217 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5218 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5219 5220 gen_to = (VecScatter_MPI_General*)ctx->todata; 5221 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5222 rvalues = gen_from->values; /* holds the length of receiving row */ 5223 svalues = gen_to->values; /* holds the length of sending row */ 5224 nrecvs = gen_from->n; 5225 nsends = gen_to->n; 5226 5227 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5228 srow = gen_to->indices; /* local row index to be sent */ 5229 sstarts = gen_to->starts; 5230 sprocs = gen_to->procs; 5231 sstatus = gen_to->sstatus; 5232 sbs = gen_to->bs; 5233 rstarts = gen_from->starts; 5234 rprocs = gen_from->procs; 5235 rbs = gen_from->bs; 5236 5237 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5238 if (scall == MAT_INITIAL_MATRIX){ 5239 /* i-array */ 5240 /*---------*/ 5241 /* post receives */ 5242 for (i=0; i<nrecvs; i++){ 5243 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5244 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5245 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5246 } 5247 5248 /* pack the outgoing message */ 5249 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5250 sstartsj[0] = 0; rstartsj[0] = 0; 5251 len = 0; /* total length of j or a array to be sent */ 5252 k = 0; 5253 for (i=0; i<nsends; i++){ 5254 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5255 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5256 for (j=0; j<nrows; j++) { 5257 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5258 for (l=0; l<sbs; l++){ 5259 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5260 rowlen[j*sbs+l] = ncols; 5261 len += ncols; 5262 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5263 } 5264 k++; 5265 } 5266 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5267 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5268 } 5269 /* recvs and sends of i-array are completed */ 5270 i = nrecvs; 5271 while (i--) { 5272 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5273 } 5274 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5275 5276 /* allocate buffers for sending j and a arrays */ 5277 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5278 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5279 5280 /* create i-array of B_oth */ 5281 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5282 b_othi[0] = 0; 5283 len = 0; /* total length of j or a array to be received */ 5284 k = 0; 5285 for (i=0; i<nrecvs; i++){ 5286 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5287 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5288 for (j=0; j<nrows; j++) { 5289 b_othi[k+1] = b_othi[k] + rowlen[j]; 5290 len += rowlen[j]; k++; 5291 } 5292 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5293 } 5294 5295 /* allocate space for j and a arrrays of B_oth */ 5296 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5297 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5298 5299 /* j-array */ 5300 /*---------*/ 5301 /* post receives of j-array */ 5302 for (i=0; i<nrecvs; i++){ 5303 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5304 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5305 } 5306 5307 /* pack the outgoing message j-array */ 5308 k = 0; 5309 for (i=0; i<nsends; i++){ 5310 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5311 bufJ = bufj+sstartsj[i]; 5312 for (j=0; j<nrows; j++) { 5313 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5314 for (ll=0; ll<sbs; ll++){ 5315 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5316 for (l=0; l<ncols; l++){ 5317 *bufJ++ = cols[l]; 5318 } 5319 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5320 } 5321 } 5322 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5323 } 5324 5325 /* recvs and sends of j-array are completed */ 5326 i = nrecvs; 5327 while (i--) { 5328 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5329 } 5330 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5331 } else if (scall == MAT_REUSE_MATRIX){ 5332 sstartsj = *startsj_s; 5333 rstartsj = *startsj_r; 5334 bufa = *bufa_ptr; 5335 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5336 b_otha = b_oth->a; 5337 } else { 5338 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5339 } 5340 5341 /* a-array */ 5342 /*---------*/ 5343 /* post receives of a-array */ 5344 for (i=0; i<nrecvs; i++){ 5345 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5346 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5347 } 5348 5349 /* pack the outgoing message a-array */ 5350 k = 0; 5351 for (i=0; i<nsends; i++){ 5352 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5353 bufA = bufa+sstartsj[i]; 5354 for (j=0; j<nrows; j++) { 5355 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5356 for (ll=0; ll<sbs; ll++){ 5357 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5358 for (l=0; l<ncols; l++){ 5359 *bufA++ = vals[l]; 5360 } 5361 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5362 } 5363 } 5364 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5365 } 5366 /* recvs and sends of a-array are completed */ 5367 i = nrecvs; 5368 while (i--) { 5369 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5370 } 5371 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5372 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5373 5374 if (scall == MAT_INITIAL_MATRIX){ 5375 /* put together the new matrix */ 5376 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5377 5378 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5379 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5380 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5381 b_oth->free_a = PETSC_TRUE; 5382 b_oth->free_ij = PETSC_TRUE; 5383 b_oth->nonew = 0; 5384 5385 ierr = PetscFree(bufj);CHKERRQ(ierr); 5386 if (!startsj_s || !bufa_ptr){ 5387 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5388 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5389 } else { 5390 *startsj_s = sstartsj; 5391 *startsj_r = rstartsj; 5392 *bufa_ptr = bufa; 5393 } 5394 } 5395 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5396 PetscFunctionReturn(0); 5397 } 5398 5399 #undef __FUNCT__ 5400 #define __FUNCT__ "MatGetCommunicationStructs" 5401 /*@C 5402 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5403 5404 Not Collective 5405 5406 Input Parameters: 5407 . A - The matrix in mpiaij format 5408 5409 Output Parameter: 5410 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5411 . colmap - A map from global column index to local index into lvec 5412 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5413 5414 Level: developer 5415 5416 @*/ 5417 #if defined (PETSC_USE_CTABLE) 5418 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5419 #else 5420 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5421 #endif 5422 { 5423 Mat_MPIAIJ *a; 5424 5425 PetscFunctionBegin; 5426 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5427 PetscValidPointer(lvec, 2); 5428 PetscValidPointer(colmap, 3); 5429 PetscValidPointer(multScatter, 4); 5430 a = (Mat_MPIAIJ *) A->data; 5431 if (lvec) *lvec = a->lvec; 5432 if (colmap) *colmap = a->colmap; 5433 if (multScatter) *multScatter = a->Mvctx; 5434 PetscFunctionReturn(0); 5435 } 5436 5437 EXTERN_C_BEGIN 5438 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5439 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5440 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5441 EXTERN_C_END 5442 5443 #undef __FUNCT__ 5444 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5445 /* 5446 Computes (B'*A')' since computing B*A directly is untenable 5447 5448 n p p 5449 ( ) ( ) ( ) 5450 m ( A ) * n ( B ) = m ( C ) 5451 ( ) ( ) ( ) 5452 5453 */ 5454 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5455 { 5456 PetscErrorCode ierr; 5457 Mat At,Bt,Ct; 5458 5459 PetscFunctionBegin; 5460 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5461 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5462 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5463 ierr = MatDestroy(&At);CHKERRQ(ierr); 5464 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5465 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5466 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5467 PetscFunctionReturn(0); 5468 } 5469 5470 #undef __FUNCT__ 5471 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5472 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5473 { 5474 PetscErrorCode ierr; 5475 PetscInt m=A->rmap->n,n=B->cmap->n; 5476 Mat Cmat; 5477 5478 PetscFunctionBegin; 5479 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5480 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5481 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5482 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5483 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5484 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5485 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5486 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5487 *C = Cmat; 5488 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5489 PetscFunctionReturn(0); 5490 } 5491 5492 /* ----------------------------------------------------------------*/ 5493 #undef __FUNCT__ 5494 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5495 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5496 { 5497 PetscErrorCode ierr; 5498 5499 PetscFunctionBegin; 5500 if (scall == MAT_INITIAL_MATRIX){ 5501 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5502 } 5503 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5504 PetscFunctionReturn(0); 5505 } 5506 5507 EXTERN_C_BEGIN 5508 #if defined(PETSC_HAVE_MUMPS) 5509 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5510 #endif 5511 #if defined(PETSC_HAVE_PASTIX) 5512 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5513 #endif 5514 #if defined(PETSC_HAVE_SUPERLU_DIST) 5515 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5516 #endif 5517 #if defined(PETSC_HAVE_SPOOLES) 5518 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5519 #endif 5520 EXTERN_C_END 5521 5522 /*MC 5523 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5524 5525 Options Database Keys: 5526 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5527 5528 Level: beginner 5529 5530 .seealso: MatCreateAIJ() 5531 M*/ 5532 5533 EXTERN_C_BEGIN 5534 #undef __FUNCT__ 5535 #define __FUNCT__ "MatCreate_MPIAIJ" 5536 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5537 { 5538 Mat_MPIAIJ *b; 5539 PetscErrorCode ierr; 5540 PetscMPIInt size; 5541 5542 PetscFunctionBegin; 5543 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5544 5545 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5546 B->data = (void*)b; 5547 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5548 B->assembled = PETSC_FALSE; 5549 5550 B->insertmode = NOT_SET_VALUES; 5551 b->size = size; 5552 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5553 5554 /* build cache for off array entries formed */ 5555 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5556 b->donotstash = PETSC_FALSE; 5557 b->colmap = 0; 5558 b->garray = 0; 5559 b->roworiented = PETSC_TRUE; 5560 5561 /* stuff used for matrix vector multiply */ 5562 b->lvec = PETSC_NULL; 5563 b->Mvctx = PETSC_NULL; 5564 5565 /* stuff for MatGetRow() */ 5566 b->rowindices = 0; 5567 b->rowvalues = 0; 5568 b->getrowactive = PETSC_FALSE; 5569 5570 #if defined(PETSC_HAVE_SPOOLES) 5571 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5572 "MatGetFactor_mpiaij_spooles", 5573 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5574 #endif 5575 #if defined(PETSC_HAVE_MUMPS) 5576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5577 "MatGetFactor_aij_mumps", 5578 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5579 #endif 5580 #if defined(PETSC_HAVE_PASTIX) 5581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5582 "MatGetFactor_mpiaij_pastix", 5583 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5584 #endif 5585 #if defined(PETSC_HAVE_SUPERLU_DIST) 5586 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5587 "MatGetFactor_mpiaij_superlu_dist", 5588 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5589 #endif 5590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5591 "MatStoreValues_MPIAIJ", 5592 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5594 "MatRetrieveValues_MPIAIJ", 5595 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5597 "MatGetDiagonalBlock_MPIAIJ", 5598 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5600 "MatIsTranspose_MPIAIJ", 5601 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5603 "MatMPIAIJSetPreallocation_MPIAIJ", 5604 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5606 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5607 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5609 "MatDiagonalScaleLocal_MPIAIJ", 5610 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5612 "MatConvert_MPIAIJ_MPIAIJPERM", 5613 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5615 "MatConvert_MPIAIJ_MPIAIJCRL", 5616 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5618 "MatConvert_MPIAIJ_MPISBAIJ", 5619 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5621 "MatMatMult_MPIDense_MPIAIJ", 5622 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5624 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5625 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5627 "MatMatMultNumeric_MPIDense_MPIAIJ", 5628 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5629 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5630 PetscFunctionReturn(0); 5631 } 5632 EXTERN_C_END 5633 5634 #undef __FUNCT__ 5635 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5636 /*@ 5637 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5638 and "off-diagonal" part of the matrix in CSR format. 5639 5640 Collective on MPI_Comm 5641 5642 Input Parameters: 5643 + comm - MPI communicator 5644 . m - number of local rows (Cannot be PETSC_DECIDE) 5645 . n - This value should be the same as the local size used in creating the 5646 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5647 calculated if N is given) For square matrices n is almost always m. 5648 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5649 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5650 . i - row indices for "diagonal" portion of matrix 5651 . j - column indices 5652 . a - matrix values 5653 . oi - row indices for "off-diagonal" portion of matrix 5654 . oj - column indices 5655 - oa - matrix values 5656 5657 Output Parameter: 5658 . mat - the matrix 5659 5660 Level: advanced 5661 5662 Notes: 5663 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5664 must free the arrays once the matrix has been destroyed and not before. 5665 5666 The i and j indices are 0 based 5667 5668 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5669 5670 This sets local rows and cannot be used to set off-processor values. 5671 5672 You cannot later use MatSetValues() to change values in this matrix. 5673 5674 .keywords: matrix, aij, compressed row, sparse, parallel 5675 5676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5677 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5678 @*/ 5679 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5680 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5681 { 5682 PetscErrorCode ierr; 5683 Mat_MPIAIJ *maij; 5684 5685 PetscFunctionBegin; 5686 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5687 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5688 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5689 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5690 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5691 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5692 maij = (Mat_MPIAIJ*) (*mat)->data; 5693 maij->donotstash = PETSC_TRUE; 5694 (*mat)->preallocated = PETSC_TRUE; 5695 5696 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5697 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5698 5699 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5700 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5701 5702 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5705 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5706 5707 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5708 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5709 PetscFunctionReturn(0); 5710 } 5711 5712 /* 5713 Special version for direct calls from Fortran 5714 */ 5715 #include <petsc-private/fortranimpl.h> 5716 5717 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5718 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5719 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5720 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5721 #endif 5722 5723 /* Change these macros so can be used in void function */ 5724 #undef CHKERRQ 5725 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5726 #undef SETERRQ2 5727 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5728 #undef SETERRQ3 5729 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5730 #undef SETERRQ 5731 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5732 5733 EXTERN_C_BEGIN 5734 #undef __FUNCT__ 5735 #define __FUNCT__ "matsetvaluesmpiaij_" 5736 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5737 { 5738 Mat mat = *mmat; 5739 PetscInt m = *mm, n = *mn; 5740 InsertMode addv = *maddv; 5741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5742 PetscScalar value; 5743 PetscErrorCode ierr; 5744 5745 MatCheckPreallocated(mat,1); 5746 if (mat->insertmode == NOT_SET_VALUES) { 5747 mat->insertmode = addv; 5748 } 5749 #if defined(PETSC_USE_DEBUG) 5750 else if (mat->insertmode != addv) { 5751 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5752 } 5753 #endif 5754 { 5755 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5756 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5757 PetscBool roworiented = aij->roworiented; 5758 5759 /* Some Variables required in the macro */ 5760 Mat A = aij->A; 5761 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5762 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5763 MatScalar *aa = a->a; 5764 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5765 Mat B = aij->B; 5766 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5767 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5768 MatScalar *ba = b->a; 5769 5770 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5771 PetscInt nonew = a->nonew; 5772 MatScalar *ap1,*ap2; 5773 5774 PetscFunctionBegin; 5775 for (i=0; i<m; i++) { 5776 if (im[i] < 0) continue; 5777 #if defined(PETSC_USE_DEBUG) 5778 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5779 #endif 5780 if (im[i] >= rstart && im[i] < rend) { 5781 row = im[i] - rstart; 5782 lastcol1 = -1; 5783 rp1 = aj + ai[row]; 5784 ap1 = aa + ai[row]; 5785 rmax1 = aimax[row]; 5786 nrow1 = ailen[row]; 5787 low1 = 0; 5788 high1 = nrow1; 5789 lastcol2 = -1; 5790 rp2 = bj + bi[row]; 5791 ap2 = ba + bi[row]; 5792 rmax2 = bimax[row]; 5793 nrow2 = bilen[row]; 5794 low2 = 0; 5795 high2 = nrow2; 5796 5797 for (j=0; j<n; j++) { 5798 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5799 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5800 if (in[j] >= cstart && in[j] < cend){ 5801 col = in[j] - cstart; 5802 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5803 } else if (in[j] < 0) continue; 5804 #if defined(PETSC_USE_DEBUG) 5805 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5806 #endif 5807 else { 5808 if (mat->was_assembled) { 5809 if (!aij->colmap) { 5810 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5811 } 5812 #if defined (PETSC_USE_CTABLE) 5813 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5814 col--; 5815 #else 5816 col = aij->colmap[in[j]] - 1; 5817 #endif 5818 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5819 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5820 col = in[j]; 5821 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5822 B = aij->B; 5823 b = (Mat_SeqAIJ*)B->data; 5824 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5825 rp2 = bj + bi[row]; 5826 ap2 = ba + bi[row]; 5827 rmax2 = bimax[row]; 5828 nrow2 = bilen[row]; 5829 low2 = 0; 5830 high2 = nrow2; 5831 bm = aij->B->rmap->n; 5832 ba = b->a; 5833 } 5834 } else col = in[j]; 5835 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5836 } 5837 } 5838 } else { 5839 if (!aij->donotstash) { 5840 if (roworiented) { 5841 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5842 } else { 5843 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5844 } 5845 } 5846 } 5847 }} 5848 PetscFunctionReturnVoid(); 5849 } 5850 EXTERN_C_END 5851 5852