1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *lrows; 787 PetscInt r, len; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* get locally owned rows */ 792 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 793 /* fix right hand side if needed */ 794 if (x && b) { 795 const PetscScalar *xx; 796 PetscScalar *bb; 797 798 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 799 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 803 } 804 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 806 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 807 PetscBool cong; 808 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 809 if (cong) A->congruentlayouts = 1; 810 else A->congruentlayouts = 0; 811 } 812 if ((diag != 0.0) && A->congruentlayouts) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1155 #if defined(PETSC_HAVE_ELEMENTAL) 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1157 #endif 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1163 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1164 { 1165 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1166 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1167 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1168 PetscErrorCode ierr; 1169 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1170 int fd; 1171 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1172 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1173 PetscScalar *column_values; 1174 PetscInt message_count,flowcontrolcount; 1175 FILE *file; 1176 1177 PetscFunctionBegin; 1178 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1180 nz = A->nz + B->nz; 1181 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1182 if (!rank) { 1183 header[0] = MAT_FILE_CLASSID; 1184 header[1] = mat->rmap->N; 1185 header[2] = mat->cmap->N; 1186 1187 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1188 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1189 /* get largest number of rows any processor has */ 1190 rlen = mat->rmap->n; 1191 range = mat->rmap->range; 1192 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1193 } else { 1194 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 rlen = mat->rmap->n; 1196 } 1197 1198 /* load up the local row counts */ 1199 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1200 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1201 1202 /* store the row lengths to the file */ 1203 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1204 if (!rank) { 1205 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 for (i=1; i<size; i++) { 1207 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1208 rlen = range[i+1] - range[i]; 1209 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1211 } 1212 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1213 } else { 1214 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1215 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1216 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1217 } 1218 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1219 1220 /* load up the local column indices */ 1221 nzmax = nz; /* th processor needs space a largest processor needs */ 1222 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1223 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1224 cnt = 0; 1225 for (i=0; i<mat->rmap->n; i++) { 1226 for (j=B->i[i]; j<B->i[i+1]; j++) { 1227 if ((col = garray[B->j[j]]) > cstart) break; 1228 column_indices[cnt++] = col; 1229 } 1230 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1231 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1232 } 1233 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1234 1235 /* store the column indices to the file */ 1236 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1237 if (!rank) { 1238 MPI_Status status; 1239 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1240 for (i=1; i<size; i++) { 1241 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1242 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1243 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1244 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1246 } 1247 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1248 } else { 1249 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1250 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1253 } 1254 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1255 1256 /* load up the local column values */ 1257 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1258 cnt = 0; 1259 for (i=0; i<mat->rmap->n; i++) { 1260 for (j=B->i[i]; j<B->i[i+1]; j++) { 1261 if (garray[B->j[j]] > cstart) break; 1262 column_values[cnt++] = B->a[j]; 1263 } 1264 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1265 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1266 } 1267 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1268 1269 /* store the column values to the file */ 1270 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1271 if (!rank) { 1272 MPI_Status status; 1273 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1274 for (i=1; i<size; i++) { 1275 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1276 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1277 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1278 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1279 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1280 } 1281 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1282 } else { 1283 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1284 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1286 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1287 } 1288 ierr = PetscFree(column_values);CHKERRQ(ierr); 1289 1290 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1291 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #include <petscdraw.h> 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1298 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1299 { 1300 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1301 PetscErrorCode ierr; 1302 PetscMPIInt rank = aij->rank,size = aij->size; 1303 PetscBool isdraw,iascii,isbinary; 1304 PetscViewer sviewer; 1305 PetscViewerFormat format; 1306 1307 PetscFunctionBegin; 1308 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1309 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1311 if (iascii) { 1312 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1313 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1314 MatInfo info; 1315 PetscBool inodes; 1316 1317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1318 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1319 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1320 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1321 if (!inodes) { 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1324 } else { 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1326 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1327 } 1328 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1330 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1332 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1335 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1338 PetscInt inodecount,inodelimit,*inodes; 1339 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1340 if (inodes) { 1341 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1347 PetscFunctionReturn(0); 1348 } 1349 } else if (isbinary) { 1350 if (size == 1) { 1351 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1352 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1355 } 1356 PetscFunctionReturn(0); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1361 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1362 if (isnull) PetscFunctionReturn(0); 1363 } 1364 1365 { 1366 /* assemble the entire matrix onto first processor. */ 1367 Mat A; 1368 Mat_SeqAIJ *Aloc; 1369 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1370 MatScalar *a; 1371 1372 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1373 if (!rank) { 1374 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1375 } else { 1376 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1377 } 1378 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1379 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1380 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1381 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1382 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1383 1384 /* copy over the A part */ 1385 Aloc = (Mat_SeqAIJ*)aij->A->data; 1386 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1387 row = mat->rmap->rstart; 1388 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; 1392 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1393 } 1394 aj = Aloc->j; 1395 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1396 1397 /* copy over the B part */ 1398 Aloc = (Mat_SeqAIJ*)aij->B->data; 1399 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1400 row = mat->rmap->rstart; 1401 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1402 ct = cols; 1403 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1408 } 1409 ierr = PetscFree(ct);CHKERRQ(ierr); 1410 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 /* 1413 Everyone has to call to draw the matrix since the graphics waits are 1414 synchronized across all processors that share the PetscDraw object 1415 */ 1416 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1417 if (!rank) { 1418 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1419 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1420 } 1421 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1423 ierr = MatDestroy(&A);CHKERRQ(ierr); 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatView_MPIAIJ" 1430 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1431 { 1432 PetscErrorCode ierr; 1433 PetscBool iascii,isdraw,issocket,isbinary; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1438 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1440 if (iascii || isdraw || isbinary || issocket) { 1441 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1442 } 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatSOR_MPIAIJ" 1448 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1449 { 1450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1451 PetscErrorCode ierr; 1452 Vec bb1 = 0; 1453 PetscBool hasop; 1454 1455 PetscFunctionBegin; 1456 if (flag == SOR_APPLY_UPPER) { 1457 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1462 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1463 } 1464 1465 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1466 if (flag & SOR_ZERO_INITIAL_GUESS) { 1467 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1468 its--; 1469 } 1470 1471 while (its--) { 1472 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1477 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1478 1479 /* local sweep */ 1480 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1481 } 1482 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1483 if (flag & SOR_ZERO_INITIAL_GUESS) { 1484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1485 its--; 1486 } 1487 while (its--) { 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 1491 /* update rhs: bb1 = bb - B*x */ 1492 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1494 1495 /* local sweep */ 1496 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1497 } 1498 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1499 if (flag & SOR_ZERO_INITIAL_GUESS) { 1500 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1501 its--; 1502 } 1503 while (its--) { 1504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 1507 /* update rhs: bb1 = bb - B*x */ 1508 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1509 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1510 1511 /* local sweep */ 1512 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1513 } 1514 } else if (flag & SOR_EISENSTAT) { 1515 Vec xx1; 1516 1517 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1518 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1519 1520 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 if (!mat->diag) { 1523 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1524 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1525 } 1526 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1527 if (hasop) { 1528 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1529 } else { 1530 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1531 } 1532 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1533 1534 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1535 1536 /* local sweep */ 1537 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1538 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1539 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1540 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1541 1542 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1543 1544 matin->errortype = mat->A->errortype; 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatPermute_MPIAIJ" 1550 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1551 { 1552 Mat aA,aB,Aperm; 1553 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1554 PetscScalar *aa,*ba; 1555 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1556 PetscSF rowsf,sf; 1557 IS parcolp = NULL; 1558 PetscBool done; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1563 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1564 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1565 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1566 1567 /* Invert row permutation to find out where my rows should go */ 1568 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1569 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1570 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1571 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1572 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1574 1575 /* Invert column permutation to find out where my columns should go */ 1576 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1577 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1578 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1579 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1580 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1581 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1582 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1583 1584 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1585 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1586 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1587 1588 /* Find out where my gcols should go */ 1589 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1590 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1591 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1592 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1593 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1594 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1595 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1596 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1597 1598 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1599 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1600 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1601 for (i=0; i<m; i++) { 1602 PetscInt row = rdest[i],rowner; 1603 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1604 for (j=ai[i]; j<ai[i+1]; j++) { 1605 PetscInt cowner,col = cdest[aj[j]]; 1606 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1607 if (rowner == cowner) dnnz[i]++; 1608 else onnz[i]++; 1609 } 1610 for (j=bi[i]; j<bi[i+1]; j++) { 1611 PetscInt cowner,col = gcdest[bj[j]]; 1612 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1613 if (rowner == cowner) dnnz[i]++; 1614 else onnz[i]++; 1615 } 1616 } 1617 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1618 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1619 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1620 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1621 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1622 1623 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1624 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1625 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1626 for (i=0; i<m; i++) { 1627 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1628 PetscInt j0,rowlen; 1629 rowlen = ai[i+1] - ai[i]; 1630 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1631 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1632 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1633 } 1634 rowlen = bi[i+1] - bi[i]; 1635 for (j0=j=0; j<rowlen; j0=j) { 1636 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1637 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1638 } 1639 } 1640 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1643 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1644 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1645 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1646 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1647 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1648 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1649 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1650 *B = Aperm; 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1656 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1657 { 1658 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1659 PetscErrorCode ierr; 1660 1661 PetscFunctionBegin; 1662 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1663 if (ghosts) *ghosts = aij->garray; 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1669 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1670 { 1671 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1672 Mat A = mat->A,B = mat->B; 1673 PetscErrorCode ierr; 1674 PetscReal isend[5],irecv[5]; 1675 1676 PetscFunctionBegin; 1677 info->block_size = 1.0; 1678 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1679 1680 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1681 isend[3] = info->memory; isend[4] = info->mallocs; 1682 1683 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1684 1685 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1686 isend[3] += info->memory; isend[4] += info->mallocs; 1687 if (flag == MAT_LOCAL) { 1688 info->nz_used = isend[0]; 1689 info->nz_allocated = isend[1]; 1690 info->nz_unneeded = isend[2]; 1691 info->memory = isend[3]; 1692 info->mallocs = isend[4]; 1693 } else if (flag == MAT_GLOBAL_MAX) { 1694 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1695 1696 info->nz_used = irecv[0]; 1697 info->nz_allocated = irecv[1]; 1698 info->nz_unneeded = irecv[2]; 1699 info->memory = irecv[3]; 1700 info->mallocs = irecv[4]; 1701 } else if (flag == MAT_GLOBAL_SUM) { 1702 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1703 1704 info->nz_used = irecv[0]; 1705 info->nz_allocated = irecv[1]; 1706 info->nz_unneeded = irecv[2]; 1707 info->memory = irecv[3]; 1708 info->mallocs = irecv[4]; 1709 } 1710 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1711 info->fill_ratio_needed = 0; 1712 info->factor_mallocs = 0; 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatSetOption_MPIAIJ" 1718 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1719 { 1720 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 switch (op) { 1725 case MAT_NEW_NONZERO_LOCATIONS: 1726 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1727 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1728 case MAT_KEEP_NONZERO_PATTERN: 1729 case MAT_NEW_NONZERO_LOCATION_ERR: 1730 case MAT_USE_INODES: 1731 case MAT_IGNORE_ZERO_ENTRIES: 1732 MatCheckPreallocated(A,1); 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1735 break; 1736 case MAT_ROW_ORIENTED: 1737 MatCheckPreallocated(A,1); 1738 a->roworiented = flg; 1739 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1742 break; 1743 case MAT_NEW_DIAGONALS: 1744 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1745 break; 1746 case MAT_IGNORE_OFF_PROC_ENTRIES: 1747 a->donotstash = flg; 1748 break; 1749 case MAT_SPD: 1750 A->spd_set = PETSC_TRUE; 1751 A->spd = flg; 1752 if (flg) { 1753 A->symmetric = PETSC_TRUE; 1754 A->structurally_symmetric = PETSC_TRUE; 1755 A->symmetric_set = PETSC_TRUE; 1756 A->structurally_symmetric_set = PETSC_TRUE; 1757 } 1758 break; 1759 case MAT_SYMMETRIC: 1760 MatCheckPreallocated(A,1); 1761 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1762 break; 1763 case MAT_STRUCTURALLY_SYMMETRIC: 1764 MatCheckPreallocated(A,1); 1765 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1766 break; 1767 case MAT_HERMITIAN: 1768 MatCheckPreallocated(A,1); 1769 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1770 break; 1771 case MAT_SYMMETRY_ETERNAL: 1772 MatCheckPreallocated(A,1); 1773 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1774 break; 1775 default: 1776 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatGetRow_MPIAIJ" 1783 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1784 { 1785 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1786 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1787 PetscErrorCode ierr; 1788 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1789 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1790 PetscInt *cmap,*idx_p; 1791 1792 PetscFunctionBegin; 1793 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1794 mat->getrowactive = PETSC_TRUE; 1795 1796 if (!mat->rowvalues && (idx || v)) { 1797 /* 1798 allocate enough space to hold information from the longest row. 1799 */ 1800 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1801 PetscInt max = 1,tmp; 1802 for (i=0; i<matin->rmap->n; i++) { 1803 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1804 if (max < tmp) max = tmp; 1805 } 1806 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1807 } 1808 1809 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1810 lrow = row - rstart; 1811 1812 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1813 if (!v) {pvA = 0; pvB = 0;} 1814 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1815 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1816 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1817 nztot = nzA + nzB; 1818 1819 cmap = mat->garray; 1820 if (v || idx) { 1821 if (nztot) { 1822 /* Sort by increasing column numbers, assuming A and B already sorted */ 1823 PetscInt imark = -1; 1824 if (v) { 1825 *v = v_p = mat->rowvalues; 1826 for (i=0; i<nzB; i++) { 1827 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1828 else break; 1829 } 1830 imark = i; 1831 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1832 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1833 } 1834 if (idx) { 1835 *idx = idx_p = mat->rowindices; 1836 if (imark > -1) { 1837 for (i=0; i<imark; i++) { 1838 idx_p[i] = cmap[cworkB[i]]; 1839 } 1840 } else { 1841 for (i=0; i<nzB; i++) { 1842 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1843 else break; 1844 } 1845 imark = i; 1846 } 1847 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1848 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1849 } 1850 } else { 1851 if (idx) *idx = 0; 1852 if (v) *v = 0; 1853 } 1854 } 1855 *nz = nztot; 1856 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1857 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1863 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1864 { 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1866 1867 PetscFunctionBegin; 1868 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1869 aij->getrowactive = PETSC_FALSE; 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatNorm_MPIAIJ" 1875 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1876 { 1877 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1878 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1879 PetscErrorCode ierr; 1880 PetscInt i,j,cstart = mat->cmap->rstart; 1881 PetscReal sum = 0.0; 1882 MatScalar *v; 1883 1884 PetscFunctionBegin; 1885 if (aij->size == 1) { 1886 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1887 } else { 1888 if (type == NORM_FROBENIUS) { 1889 v = amat->a; 1890 for (i=0; i<amat->nz; i++) { 1891 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1892 } 1893 v = bmat->a; 1894 for (i=0; i<bmat->nz; i++) { 1895 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1896 } 1897 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1898 *norm = PetscSqrtReal(*norm); 1899 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1900 } else if (type == NORM_1) { /* max column norm */ 1901 PetscReal *tmp,*tmp2; 1902 PetscInt *jj,*garray = aij->garray; 1903 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1904 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1905 *norm = 0.0; 1906 v = amat->a; jj = amat->j; 1907 for (j=0; j<amat->nz; j++) { 1908 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1909 } 1910 v = bmat->a; jj = bmat->j; 1911 for (j=0; j<bmat->nz; j++) { 1912 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1913 } 1914 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1915 for (j=0; j<mat->cmap->N; j++) { 1916 if (tmp2[j] > *norm) *norm = tmp2[j]; 1917 } 1918 ierr = PetscFree(tmp);CHKERRQ(ierr); 1919 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1920 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1921 } else if (type == NORM_INFINITY) { /* max row norm */ 1922 PetscReal ntemp = 0.0; 1923 for (j=0; j<aij->A->rmap->n; j++) { 1924 v = amat->a + amat->i[j]; 1925 sum = 0.0; 1926 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1927 sum += PetscAbsScalar(*v); v++; 1928 } 1929 v = bmat->a + bmat->i[j]; 1930 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1931 sum += PetscAbsScalar(*v); v++; 1932 } 1933 if (sum > ntemp) ntemp = sum; 1934 } 1935 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1936 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1937 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatTranspose_MPIAIJ" 1944 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1945 { 1946 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1947 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1948 PetscErrorCode ierr; 1949 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1950 PetscInt cstart = A->cmap->rstart,ncol; 1951 Mat B; 1952 MatScalar *array; 1953 1954 PetscFunctionBegin; 1955 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1956 1957 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1958 ai = Aloc->i; aj = Aloc->j; 1959 bi = Bloc->i; bj = Bloc->j; 1960 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1961 PetscInt *d_nnz,*g_nnz,*o_nnz; 1962 PetscSFNode *oloc; 1963 PETSC_UNUSED PetscSF sf; 1964 1965 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1966 /* compute d_nnz for preallocation */ 1967 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1968 for (i=0; i<ai[ma]; i++) { 1969 d_nnz[aj[i]]++; 1970 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1971 } 1972 /* compute local off-diagonal contributions */ 1973 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1974 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1975 /* map those to global */ 1976 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1977 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1978 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1979 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1980 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1981 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1982 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1983 1984 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1985 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1986 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1987 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1988 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1989 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1990 } else { 1991 B = *matout; 1992 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1993 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1994 } 1995 1996 /* copy over the A part */ 1997 array = Aloc->a; 1998 row = A->rmap->rstart; 1999 for (i=0; i<ma; i++) { 2000 ncol = ai[i+1]-ai[i]; 2001 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2002 row++; 2003 array += ncol; aj += ncol; 2004 } 2005 aj = Aloc->j; 2006 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2007 2008 /* copy over the B part */ 2009 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2010 array = Bloc->a; 2011 row = A->rmap->rstart; 2012 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2013 cols_tmp = cols; 2014 for (i=0; i<mb; i++) { 2015 ncol = bi[i+1]-bi[i]; 2016 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2017 row++; 2018 array += ncol; cols_tmp += ncol; 2019 } 2020 ierr = PetscFree(cols);CHKERRQ(ierr); 2021 2022 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2024 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2025 *matout = B; 2026 } else { 2027 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2028 } 2029 PetscFunctionReturn(0); 2030 } 2031 2032 #undef __FUNCT__ 2033 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2034 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2035 { 2036 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2037 Mat a = aij->A,b = aij->B; 2038 PetscErrorCode ierr; 2039 PetscInt s1,s2,s3; 2040 2041 PetscFunctionBegin; 2042 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2043 if (rr) { 2044 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2045 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2046 /* Overlap communication with computation. */ 2047 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2048 } 2049 if (ll) { 2050 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2051 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2052 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2053 } 2054 /* scale the diagonal block */ 2055 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2056 2057 if (rr) { 2058 /* Do a scatter end and then right scale the off-diagonal block */ 2059 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2060 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2061 } 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2067 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2068 { 2069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2070 PetscErrorCode ierr; 2071 2072 PetscFunctionBegin; 2073 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatEqual_MPIAIJ" 2079 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2080 { 2081 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2082 Mat a,b,c,d; 2083 PetscBool flg; 2084 PetscErrorCode ierr; 2085 2086 PetscFunctionBegin; 2087 a = matA->A; b = matA->B; 2088 c = matB->A; d = matB->B; 2089 2090 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2091 if (flg) { 2092 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2093 } 2094 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatCopy_MPIAIJ" 2100 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2101 { 2102 PetscErrorCode ierr; 2103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2104 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2105 2106 PetscFunctionBegin; 2107 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2108 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2109 /* because of the column compression in the off-processor part of the matrix a->B, 2110 the number of columns in a->B and b->B may be different, hence we cannot call 2111 the MatCopy() directly on the two parts. If need be, we can provide a more 2112 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2113 then copying the submatrices */ 2114 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2115 } else { 2116 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2117 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetUp_MPIAIJ" 2124 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2125 { 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /* 2134 Computes the number of nonzeros per row needed for preallocation when X and Y 2135 have different nonzero structure. 2136 */ 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2139 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2140 { 2141 PetscInt i,j,k,nzx,nzy; 2142 2143 PetscFunctionBegin; 2144 /* Set the number of nonzeros in the new matrix */ 2145 for (i=0; i<m; i++) { 2146 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2147 nzx = xi[i+1] - xi[i]; 2148 nzy = yi[i+1] - yi[i]; 2149 nnz[i] = 0; 2150 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2151 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2152 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2153 nnz[i]++; 2154 } 2155 for (; k<nzy; k++) nnz[i]++; 2156 } 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2163 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2164 { 2165 PetscErrorCode ierr; 2166 PetscInt m = Y->rmap->N; 2167 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2168 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2169 2170 PetscFunctionBegin; 2171 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatAXPY_MPIAIJ" 2177 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2178 { 2179 PetscErrorCode ierr; 2180 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2181 PetscBLASInt bnz,one=1; 2182 Mat_SeqAIJ *x,*y; 2183 2184 PetscFunctionBegin; 2185 if (str == SAME_NONZERO_PATTERN) { 2186 PetscScalar alpha = a; 2187 x = (Mat_SeqAIJ*)xx->A->data; 2188 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2189 y = (Mat_SeqAIJ*)yy->A->data; 2190 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2191 x = (Mat_SeqAIJ*)xx->B->data; 2192 y = (Mat_SeqAIJ*)yy->B->data; 2193 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2194 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2195 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2196 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2197 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2198 } else { 2199 Mat B; 2200 PetscInt *nnz_d,*nnz_o; 2201 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2202 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2203 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2204 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2205 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2206 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2207 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2208 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2209 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2210 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2211 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2212 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2213 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2214 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 2219 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatConjugate_MPIAIJ" 2223 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2224 { 2225 #if defined(PETSC_USE_COMPLEX) 2226 PetscErrorCode ierr; 2227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2228 2229 PetscFunctionBegin; 2230 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2231 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2232 #else 2233 PetscFunctionBegin; 2234 #endif 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatRealPart_MPIAIJ" 2240 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2241 { 2242 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2243 PetscErrorCode ierr; 2244 2245 PetscFunctionBegin; 2246 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2247 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2253 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2254 { 2255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2256 PetscErrorCode ierr; 2257 2258 PetscFunctionBegin; 2259 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2260 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2266 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2267 { 2268 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2269 PetscErrorCode ierr; 2270 PetscInt i,*idxb = 0; 2271 PetscScalar *va,*vb; 2272 Vec vtmp; 2273 2274 PetscFunctionBegin; 2275 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2276 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2277 if (idx) { 2278 for (i=0; i<A->rmap->n; i++) { 2279 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2280 } 2281 } 2282 2283 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2284 if (idx) { 2285 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2286 } 2287 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2288 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2289 2290 for (i=0; i<A->rmap->n; i++) { 2291 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2292 va[i] = vb[i]; 2293 if (idx) idx[i] = a->garray[idxb[i]]; 2294 } 2295 } 2296 2297 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2298 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2299 ierr = PetscFree(idxb);CHKERRQ(ierr); 2300 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2306 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2307 { 2308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2309 PetscErrorCode ierr; 2310 PetscInt i,*idxb = 0; 2311 PetscScalar *va,*vb; 2312 Vec vtmp; 2313 2314 PetscFunctionBegin; 2315 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2316 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2317 if (idx) { 2318 for (i=0; i<A->cmap->n; i++) { 2319 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2320 } 2321 } 2322 2323 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2324 if (idx) { 2325 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2326 } 2327 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2328 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2329 2330 for (i=0; i<A->rmap->n; i++) { 2331 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2332 va[i] = vb[i]; 2333 if (idx) idx[i] = a->garray[idxb[i]]; 2334 } 2335 } 2336 2337 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2338 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2339 ierr = PetscFree(idxb);CHKERRQ(ierr); 2340 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2349 PetscInt n = A->rmap->n; 2350 PetscInt cstart = A->cmap->rstart; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 PetscInt r; 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2360 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2361 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2362 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2363 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2364 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2365 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2366 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2367 for (r = 0; r < n; ++r) { 2368 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2369 a[r] = diagA[r]; 2370 idx[r] = cstart + diagIdx[r]; 2371 } else { 2372 a[r] = offdiagA[r]; 2373 idx[r] = cmap[offdiagIdx[r]]; 2374 } 2375 } 2376 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2378 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2379 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2380 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2381 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2387 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2388 { 2389 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2390 PetscInt n = A->rmap->n; 2391 PetscInt cstart = A->cmap->rstart; 2392 PetscInt *cmap = mat->garray; 2393 PetscInt *diagIdx, *offdiagIdx; 2394 Vec diagV, offdiagV; 2395 PetscScalar *a, *diagA, *offdiagA; 2396 PetscInt r; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2402 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2403 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2404 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2405 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2406 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2407 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2408 for (r = 0; r < n; ++r) { 2409 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2410 a[r] = diagA[r]; 2411 idx[r] = cstart + diagIdx[r]; 2412 } else { 2413 a[r] = offdiagA[r]; 2414 idx[r] = cmap[offdiagIdx[r]]; 2415 } 2416 } 2417 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2419 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2420 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2421 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2422 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2428 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2429 { 2430 PetscErrorCode ierr; 2431 Mat *dummy; 2432 2433 PetscFunctionBegin; 2434 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2435 *newmat = *dummy; 2436 ierr = PetscFree(dummy);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2442 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2443 { 2444 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2445 PetscErrorCode ierr; 2446 2447 PetscFunctionBegin; 2448 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2449 A->errortype = a->A->errortype; 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2455 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2456 { 2457 PetscErrorCode ierr; 2458 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2459 2460 PetscFunctionBegin; 2461 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2462 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2463 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2470 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2471 { 2472 PetscFunctionBegin; 2473 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2474 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2480 /*@ 2481 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2482 2483 Collective on Mat 2484 2485 Input Parameters: 2486 + A - the matrix 2487 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2488 2489 Level: advanced 2490 2491 @*/ 2492 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2493 { 2494 PetscErrorCode ierr; 2495 2496 PetscFunctionBegin; 2497 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2503 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2504 { 2505 PetscErrorCode ierr; 2506 PetscBool sc = PETSC_FALSE,flg; 2507 2508 PetscFunctionBegin; 2509 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2510 ierr = PetscObjectOptionsBegin((PetscObject)A); 2511 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2512 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2513 if (flg) { 2514 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2515 } 2516 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatShift_MPIAIJ" 2522 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2523 { 2524 PetscErrorCode ierr; 2525 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2526 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2527 2528 PetscFunctionBegin; 2529 if (!Y->preallocated) { 2530 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2531 } else if (!aij->nz) { 2532 PetscInt nonew = aij->nonew; 2533 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2534 aij->nonew = nonew; 2535 } 2536 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2542 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2543 { 2544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2545 PetscErrorCode ierr; 2546 2547 PetscFunctionBegin; 2548 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2549 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2550 if (d) { 2551 PetscInt rstart; 2552 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2553 *d += rstart; 2554 2555 } 2556 PetscFunctionReturn(0); 2557 } 2558 2559 2560 /* -------------------------------------------------------------------*/ 2561 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2562 MatGetRow_MPIAIJ, 2563 MatRestoreRow_MPIAIJ, 2564 MatMult_MPIAIJ, 2565 /* 4*/ MatMultAdd_MPIAIJ, 2566 MatMultTranspose_MPIAIJ, 2567 MatMultTransposeAdd_MPIAIJ, 2568 0, 2569 0, 2570 0, 2571 /*10*/ 0, 2572 0, 2573 0, 2574 MatSOR_MPIAIJ, 2575 MatTranspose_MPIAIJ, 2576 /*15*/ MatGetInfo_MPIAIJ, 2577 MatEqual_MPIAIJ, 2578 MatGetDiagonal_MPIAIJ, 2579 MatDiagonalScale_MPIAIJ, 2580 MatNorm_MPIAIJ, 2581 /*20*/ MatAssemblyBegin_MPIAIJ, 2582 MatAssemblyEnd_MPIAIJ, 2583 MatSetOption_MPIAIJ, 2584 MatZeroEntries_MPIAIJ, 2585 /*24*/ MatZeroRows_MPIAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*29*/ MatSetUp_MPIAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*34*/ MatDuplicate_MPIAIJ, 2596 0, 2597 0, 2598 0, 2599 0, 2600 /*39*/ MatAXPY_MPIAIJ, 2601 MatGetSubMatrices_MPIAIJ, 2602 MatIncreaseOverlap_MPIAIJ, 2603 MatGetValues_MPIAIJ, 2604 MatCopy_MPIAIJ, 2605 /*44*/ MatGetRowMax_MPIAIJ, 2606 MatScale_MPIAIJ, 2607 MatShift_MPIAIJ, 2608 MatDiagonalSet_MPIAIJ, 2609 MatZeroRowsColumns_MPIAIJ, 2610 /*49*/ MatSetRandom_MPIAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*54*/ MatFDColoringCreate_MPIXAIJ, 2616 0, 2617 MatSetUnfactored_MPIAIJ, 2618 MatPermute_MPIAIJ, 2619 0, 2620 /*59*/ MatGetSubMatrix_MPIAIJ, 2621 MatDestroy_MPIAIJ, 2622 MatView_MPIAIJ, 2623 0, 2624 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2625 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2626 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2631 MatGetRowMinAbs_MPIAIJ, 2632 0, 2633 MatSetColoring_MPIAIJ, 2634 0, 2635 MatSetValuesAdifor_MPIAIJ, 2636 /*75*/ MatFDColoringApply_AIJ, 2637 MatSetFromOptions_MPIAIJ, 2638 0, 2639 0, 2640 MatFindZeroDiagonals_MPIAIJ, 2641 /*80*/ 0, 2642 0, 2643 0, 2644 /*83*/ MatLoad_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2651 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2652 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2653 MatPtAP_MPIAIJ_MPIAIJ, 2654 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2655 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*99*/ 0, 2661 0, 2662 0, 2663 MatConjugate_MPIAIJ, 2664 0, 2665 /*104*/MatSetValuesRow_MPIAIJ, 2666 MatRealPart_MPIAIJ, 2667 MatImaginaryPart_MPIAIJ, 2668 0, 2669 0, 2670 /*109*/0, 2671 0, 2672 MatGetRowMin_MPIAIJ, 2673 0, 2674 MatMissingDiagonal_MPIAIJ, 2675 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2676 0, 2677 MatGetGhosts_MPIAIJ, 2678 0, 2679 0, 2680 /*119*/0, 2681 0, 2682 0, 2683 0, 2684 MatGetMultiProcBlock_MPIAIJ, 2685 /*124*/MatFindNonzeroRows_MPIAIJ, 2686 MatGetColumnNorms_MPIAIJ, 2687 MatInvertBlockDiagonal_MPIAIJ, 2688 0, 2689 MatGetSubMatricesMPI_MPIAIJ, 2690 /*129*/0, 2691 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2692 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2693 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2694 0, 2695 /*134*/0, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /*139*/0, 2701 0, 2702 0, 2703 MatFDColoringSetUp_MPIXAIJ, 2704 MatFindOffBlockDiagonalEntries_MPIAIJ, 2705 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2706 }; 2707 2708 /* ----------------------------------------------------------------------------------------*/ 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2712 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2713 { 2714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2719 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2725 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2726 { 2727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2732 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2738 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2739 { 2740 Mat_MPIAIJ *b; 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2745 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2746 b = (Mat_MPIAIJ*)B->data; 2747 2748 if (!B->preallocated) { 2749 /* Explicitly create 2 MATSEQAIJ matrices. */ 2750 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2751 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2752 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2753 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2754 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2755 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2756 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2757 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2758 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2759 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2760 } 2761 2762 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2763 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2764 B->preallocated = PETSC_TRUE; 2765 PetscFunctionReturn(0); 2766 } 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2770 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2771 { 2772 Mat mat; 2773 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2774 PetscErrorCode ierr; 2775 2776 PetscFunctionBegin; 2777 *newmat = 0; 2778 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2779 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2780 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2781 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2782 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2783 a = (Mat_MPIAIJ*)mat->data; 2784 2785 mat->factortype = matin->factortype; 2786 mat->assembled = PETSC_TRUE; 2787 mat->insertmode = NOT_SET_VALUES; 2788 mat->preallocated = PETSC_TRUE; 2789 2790 a->size = oldmat->size; 2791 a->rank = oldmat->rank; 2792 a->donotstash = oldmat->donotstash; 2793 a->roworiented = oldmat->roworiented; 2794 a->rowindices = 0; 2795 a->rowvalues = 0; 2796 a->getrowactive = PETSC_FALSE; 2797 2798 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2799 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2800 2801 if (oldmat->colmap) { 2802 #if defined(PETSC_USE_CTABLE) 2803 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2804 #else 2805 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2806 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2807 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2808 #endif 2809 } else a->colmap = 0; 2810 if (oldmat->garray) { 2811 PetscInt len; 2812 len = oldmat->B->cmap->n; 2813 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2814 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2815 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2816 } else a->garray = 0; 2817 2818 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2819 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2820 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2821 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2822 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2823 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2824 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2825 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2826 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2827 *newmat = mat; 2828 PetscFunctionReturn(0); 2829 } 2830 2831 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "MatLoad_MPIAIJ" 2835 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2836 { 2837 PetscScalar *vals,*svals; 2838 MPI_Comm comm; 2839 PetscErrorCode ierr; 2840 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2841 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2842 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2843 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2844 PetscInt cend,cstart,n,*rowners; 2845 int fd; 2846 PetscInt bs = newMat->rmap->bs; 2847 2848 PetscFunctionBegin; 2849 /* force binary viewer to load .info file if it has not yet done so */ 2850 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2851 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2852 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2853 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2854 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2855 if (!rank) { 2856 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2857 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2858 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ"); 2859 } 2860 2861 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2862 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2863 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2864 if (bs < 0) bs = 1; 2865 2866 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2867 M = header[1]; N = header[2]; 2868 2869 /* If global sizes are set, check if they are consistent with that given in the file */ 2870 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2871 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2872 2873 /* determine ownership of all (block) rows */ 2874 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2875 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2876 else m = newMat->rmap->n; /* Set by user */ 2877 2878 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2879 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2880 2881 /* First process needs enough room for process with most rows */ 2882 if (!rank) { 2883 mmax = rowners[1]; 2884 for (i=2; i<=size; i++) { 2885 mmax = PetscMax(mmax, rowners[i]); 2886 } 2887 } else mmax = -1; /* unused, but compilers complain */ 2888 2889 rowners[0] = 0; 2890 for (i=2; i<=size; i++) { 2891 rowners[i] += rowners[i-1]; 2892 } 2893 rstart = rowners[rank]; 2894 rend = rowners[rank+1]; 2895 2896 /* distribute row lengths to all processors */ 2897 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2898 if (!rank) { 2899 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2900 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2901 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2902 for (j=0; j<m; j++) { 2903 procsnz[0] += ourlens[j]; 2904 } 2905 for (i=1; i<size; i++) { 2906 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2907 /* calculate the number of nonzeros on each processor */ 2908 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2909 procsnz[i] += rowlengths[j]; 2910 } 2911 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2912 } 2913 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2914 } else { 2915 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2916 } 2917 2918 if (!rank) { 2919 /* determine max buffer needed and allocate it */ 2920 maxnz = 0; 2921 for (i=0; i<size; i++) { 2922 maxnz = PetscMax(maxnz,procsnz[i]); 2923 } 2924 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2925 2926 /* read in my part of the matrix column indices */ 2927 nz = procsnz[0]; 2928 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2929 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2930 2931 /* read in every one elses and ship off */ 2932 for (i=1; i<size; i++) { 2933 nz = procsnz[i]; 2934 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2935 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2936 } 2937 ierr = PetscFree(cols);CHKERRQ(ierr); 2938 } else { 2939 /* determine buffer space needed for message */ 2940 nz = 0; 2941 for (i=0; i<m; i++) { 2942 nz += ourlens[i]; 2943 } 2944 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2945 2946 /* receive message of column indices*/ 2947 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2948 } 2949 2950 /* determine column ownership if matrix is not square */ 2951 if (N != M) { 2952 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2953 else n = newMat->cmap->n; 2954 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2955 cstart = cend - n; 2956 } else { 2957 cstart = rstart; 2958 cend = rend; 2959 n = cend - cstart; 2960 } 2961 2962 /* loop over local rows, determining number of off diagonal entries */ 2963 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2964 jj = 0; 2965 for (i=0; i<m; i++) { 2966 for (j=0; j<ourlens[i]; j++) { 2967 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2968 jj++; 2969 } 2970 } 2971 2972 for (i=0; i<m; i++) { 2973 ourlens[i] -= offlens[i]; 2974 } 2975 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2976 2977 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2978 2979 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2980 2981 for (i=0; i<m; i++) { 2982 ourlens[i] += offlens[i]; 2983 } 2984 2985 if (!rank) { 2986 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2987 2988 /* read in my part of the matrix numerical values */ 2989 nz = procsnz[0]; 2990 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2991 2992 /* insert into matrix */ 2993 jj = rstart; 2994 smycols = mycols; 2995 svals = vals; 2996 for (i=0; i<m; i++) { 2997 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2998 smycols += ourlens[i]; 2999 svals += ourlens[i]; 3000 jj++; 3001 } 3002 3003 /* read in other processors and ship out */ 3004 for (i=1; i<size; i++) { 3005 nz = procsnz[i]; 3006 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3007 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3008 } 3009 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3010 } else { 3011 /* receive numeric values */ 3012 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3013 3014 /* receive message of values*/ 3015 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3016 3017 /* insert into matrix */ 3018 jj = rstart; 3019 smycols = mycols; 3020 svals = vals; 3021 for (i=0; i<m; i++) { 3022 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3023 smycols += ourlens[i]; 3024 svals += ourlens[i]; 3025 jj++; 3026 } 3027 } 3028 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3029 ierr = PetscFree(vals);CHKERRQ(ierr); 3030 ierr = PetscFree(mycols);CHKERRQ(ierr); 3031 ierr = PetscFree(rowners);CHKERRQ(ierr); 3032 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3034 PetscFunctionReturn(0); 3035 } 3036 3037 #undef __FUNCT__ 3038 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3039 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3040 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3041 { 3042 PetscErrorCode ierr; 3043 IS iscol_local; 3044 PetscInt csize; 3045 3046 PetscFunctionBegin; 3047 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3048 if (call == MAT_REUSE_MATRIX) { 3049 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3050 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3051 } else { 3052 /* check if we are grabbing all columns*/ 3053 PetscBool isstride; 3054 PetscMPIInt lisstride = 0,gisstride; 3055 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3056 if (isstride) { 3057 PetscInt start,len,mstart,mlen; 3058 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3059 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3060 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3061 if (mstart == start && mlen-mstart == len) lisstride = 1; 3062 } 3063 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3064 if (gisstride) { 3065 PetscInt N; 3066 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3067 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3068 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3069 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3070 } else { 3071 PetscInt cbs; 3072 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3073 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3074 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3075 } 3076 } 3077 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3078 if (call == MAT_INITIAL_MATRIX) { 3079 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3080 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3081 } 3082 PetscFunctionReturn(0); 3083 } 3084 3085 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3088 /* 3089 Not great since it makes two copies of the submatrix, first an SeqAIJ 3090 in local and then by concatenating the local matrices the end result. 3091 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3092 3093 Note: This requires a sequential iscol with all indices. 3094 */ 3095 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3096 { 3097 PetscErrorCode ierr; 3098 PetscMPIInt rank,size; 3099 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3100 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3101 PetscBool allcolumns, colflag; 3102 Mat M,Mreuse; 3103 MatScalar *vwork,*aa; 3104 MPI_Comm comm; 3105 Mat_SeqAIJ *aij; 3106 3107 PetscFunctionBegin; 3108 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3111 3112 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3113 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3114 if (colflag && ncol == mat->cmap->N) { 3115 allcolumns = PETSC_TRUE; 3116 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3117 } else { 3118 allcolumns = PETSC_FALSE; 3119 } 3120 if (call == MAT_REUSE_MATRIX) { 3121 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3122 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3123 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3124 } else { 3125 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3126 } 3127 3128 /* 3129 m - number of local rows 3130 n - number of columns (same on all processors) 3131 rstart - first row in new global matrix generated 3132 */ 3133 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3134 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3135 if (call == MAT_INITIAL_MATRIX) { 3136 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3137 ii = aij->i; 3138 jj = aij->j; 3139 3140 /* 3141 Determine the number of non-zeros in the diagonal and off-diagonal 3142 portions of the matrix in order to do correct preallocation 3143 */ 3144 3145 /* first get start and end of "diagonal" columns */ 3146 if (csize == PETSC_DECIDE) { 3147 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3148 if (mglobal == n) { /* square matrix */ 3149 nlocal = m; 3150 } else { 3151 nlocal = n/size + ((n % size) > rank); 3152 } 3153 } else { 3154 nlocal = csize; 3155 } 3156 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3157 rstart = rend - nlocal; 3158 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3159 3160 /* next, compute all the lengths */ 3161 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3162 olens = dlens + m; 3163 for (i=0; i<m; i++) { 3164 jend = ii[i+1] - ii[i]; 3165 olen = 0; 3166 dlen = 0; 3167 for (j=0; j<jend; j++) { 3168 if (*jj < rstart || *jj >= rend) olen++; 3169 else dlen++; 3170 jj++; 3171 } 3172 olens[i] = olen; 3173 dlens[i] = dlen; 3174 } 3175 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3176 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3177 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3178 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3179 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3180 ierr = PetscFree(dlens);CHKERRQ(ierr); 3181 } else { 3182 PetscInt ml,nl; 3183 3184 M = *newmat; 3185 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3186 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3187 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3188 /* 3189 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3190 rather than the slower MatSetValues(). 3191 */ 3192 M->was_assembled = PETSC_TRUE; 3193 M->assembled = PETSC_FALSE; 3194 } 3195 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3196 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3197 ii = aij->i; 3198 jj = aij->j; 3199 aa = aij->a; 3200 for (i=0; i<m; i++) { 3201 row = rstart + i; 3202 nz = ii[i+1] - ii[i]; 3203 cwork = jj; jj += nz; 3204 vwork = aa; aa += nz; 3205 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3206 } 3207 3208 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3210 *newmat = M; 3211 3212 /* save submatrix used in processor for next request */ 3213 if (call == MAT_INITIAL_MATRIX) { 3214 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3215 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3216 } 3217 PetscFunctionReturn(0); 3218 } 3219 3220 #undef __FUNCT__ 3221 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3222 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3223 { 3224 PetscInt m,cstart, cend,j,nnz,i,d; 3225 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3226 const PetscInt *JJ; 3227 PetscScalar *values; 3228 PetscErrorCode ierr; 3229 3230 PetscFunctionBegin; 3231 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3232 3233 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3234 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3235 m = B->rmap->n; 3236 cstart = B->cmap->rstart; 3237 cend = B->cmap->rend; 3238 rstart = B->rmap->rstart; 3239 3240 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3241 3242 #if defined(PETSC_USE_DEBUGGING) 3243 for (i=0; i<m; i++) { 3244 nnz = Ii[i+1]- Ii[i]; 3245 JJ = J + Ii[i]; 3246 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3247 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3248 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3249 } 3250 #endif 3251 3252 for (i=0; i<m; i++) { 3253 nnz = Ii[i+1]- Ii[i]; 3254 JJ = J + Ii[i]; 3255 nnz_max = PetscMax(nnz_max,nnz); 3256 d = 0; 3257 for (j=0; j<nnz; j++) { 3258 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3259 } 3260 d_nnz[i] = d; 3261 o_nnz[i] = nnz - d; 3262 } 3263 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3264 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3265 3266 if (v) values = (PetscScalar*)v; 3267 else { 3268 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3269 } 3270 3271 for (i=0; i<m; i++) { 3272 ii = i + rstart; 3273 nnz = Ii[i+1]- Ii[i]; 3274 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3275 } 3276 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3277 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3278 3279 if (!v) { 3280 ierr = PetscFree(values);CHKERRQ(ierr); 3281 } 3282 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3283 PetscFunctionReturn(0); 3284 } 3285 3286 #undef __FUNCT__ 3287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3288 /*@ 3289 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3290 (the default parallel PETSc format). 3291 3292 Collective on MPI_Comm 3293 3294 Input Parameters: 3295 + B - the matrix 3296 . i - the indices into j for the start of each local row (starts with zero) 3297 . j - the column indices for each local row (starts with zero) 3298 - v - optional values in the matrix 3299 3300 Level: developer 3301 3302 Notes: 3303 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3304 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3305 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3306 3307 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3308 3309 The format which is used for the sparse matrix input, is equivalent to a 3310 row-major ordering.. i.e for the following matrix, the input data expected is 3311 as shown 3312 3313 $ 1 0 0 3314 $ 2 0 3 P0 3315 $ ------- 3316 $ 4 5 6 P1 3317 $ 3318 $ Process0 [P0]: rows_owned=[0,1] 3319 $ i = {0,1,3} [size = nrow+1 = 2+1] 3320 $ j = {0,0,2} [size = 3] 3321 $ v = {1,2,3} [size = 3] 3322 $ 3323 $ Process1 [P1]: rows_owned=[2] 3324 $ i = {0,3} [size = nrow+1 = 1+1] 3325 $ j = {0,1,2} [size = 3] 3326 $ v = {4,5,6} [size = 3] 3327 3328 .keywords: matrix, aij, compressed row, sparse, parallel 3329 3330 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3331 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3332 @*/ 3333 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3334 { 3335 PetscErrorCode ierr; 3336 3337 PetscFunctionBegin; 3338 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3339 PetscFunctionReturn(0); 3340 } 3341 3342 #undef __FUNCT__ 3343 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3344 /*@C 3345 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3346 (the default parallel PETSc format). For good matrix assembly performance 3347 the user should preallocate the matrix storage by setting the parameters 3348 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3349 performance can be increased by more than a factor of 50. 3350 3351 Collective on MPI_Comm 3352 3353 Input Parameters: 3354 + B - the matrix 3355 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3356 (same value is used for all local rows) 3357 . d_nnz - array containing the number of nonzeros in the various rows of the 3358 DIAGONAL portion of the local submatrix (possibly different for each row) 3359 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3360 The size of this array is equal to the number of local rows, i.e 'm'. 3361 For matrices that will be factored, you must leave room for (and set) 3362 the diagonal entry even if it is zero. 3363 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3364 submatrix (same value is used for all local rows). 3365 - o_nnz - array containing the number of nonzeros in the various rows of the 3366 OFF-DIAGONAL portion of the local submatrix (possibly different for 3367 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3368 structure. The size of this array is equal to the number 3369 of local rows, i.e 'm'. 3370 3371 If the *_nnz parameter is given then the *_nz parameter is ignored 3372 3373 The AIJ format (also called the Yale sparse matrix format or 3374 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3375 storage. The stored row and column indices begin with zero. 3376 See Users-Manual: ch_mat for details. 3377 3378 The parallel matrix is partitioned such that the first m0 rows belong to 3379 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3380 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3381 3382 The DIAGONAL portion of the local submatrix of a processor can be defined 3383 as the submatrix which is obtained by extraction the part corresponding to 3384 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3385 first row that belongs to the processor, r2 is the last row belonging to 3386 the this processor, and c1-c2 is range of indices of the local part of a 3387 vector suitable for applying the matrix to. This is an mxn matrix. In the 3388 common case of a square matrix, the row and column ranges are the same and 3389 the DIAGONAL part is also square. The remaining portion of the local 3390 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3391 3392 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3393 3394 You can call MatGetInfo() to get information on how effective the preallocation was; 3395 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3396 You can also run with the option -info and look for messages with the string 3397 malloc in them to see if additional memory allocation was needed. 3398 3399 Example usage: 3400 3401 Consider the following 8x8 matrix with 34 non-zero values, that is 3402 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3403 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3404 as follows: 3405 3406 .vb 3407 1 2 0 | 0 3 0 | 0 4 3408 Proc0 0 5 6 | 7 0 0 | 8 0 3409 9 0 10 | 11 0 0 | 12 0 3410 ------------------------------------- 3411 13 0 14 | 15 16 17 | 0 0 3412 Proc1 0 18 0 | 19 20 21 | 0 0 3413 0 0 0 | 22 23 0 | 24 0 3414 ------------------------------------- 3415 Proc2 25 26 27 | 0 0 28 | 29 0 3416 30 0 0 | 31 32 33 | 0 34 3417 .ve 3418 3419 This can be represented as a collection of submatrices as: 3420 3421 .vb 3422 A B C 3423 D E F 3424 G H I 3425 .ve 3426 3427 Where the submatrices A,B,C are owned by proc0, D,E,F are 3428 owned by proc1, G,H,I are owned by proc2. 3429 3430 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3431 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3432 The 'M','N' parameters are 8,8, and have the same values on all procs. 3433 3434 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3435 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3436 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3437 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3438 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3439 matrix, ans [DF] as another SeqAIJ matrix. 3440 3441 When d_nz, o_nz parameters are specified, d_nz storage elements are 3442 allocated for every row of the local diagonal submatrix, and o_nz 3443 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3444 One way to choose d_nz and o_nz is to use the max nonzerors per local 3445 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3446 In this case, the values of d_nz,o_nz are: 3447 .vb 3448 proc0 : dnz = 2, o_nz = 2 3449 proc1 : dnz = 3, o_nz = 2 3450 proc2 : dnz = 1, o_nz = 4 3451 .ve 3452 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3453 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3454 for proc3. i.e we are using 12+15+10=37 storage locations to store 3455 34 values. 3456 3457 When d_nnz, o_nnz parameters are specified, the storage is specified 3458 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3459 In the above case the values for d_nnz,o_nnz are: 3460 .vb 3461 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3462 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3463 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3464 .ve 3465 Here the space allocated is sum of all the above values i.e 34, and 3466 hence pre-allocation is perfect. 3467 3468 Level: intermediate 3469 3470 .keywords: matrix, aij, compressed row, sparse, parallel 3471 3472 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3473 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3474 @*/ 3475 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3476 { 3477 PetscErrorCode ierr; 3478 3479 PetscFunctionBegin; 3480 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3481 PetscValidType(B,1); 3482 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3483 PetscFunctionReturn(0); 3484 } 3485 3486 #undef __FUNCT__ 3487 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3488 /*@ 3489 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3490 CSR format the local rows. 3491 3492 Collective on MPI_Comm 3493 3494 Input Parameters: 3495 + comm - MPI communicator 3496 . m - number of local rows (Cannot be PETSC_DECIDE) 3497 . n - This value should be the same as the local size used in creating the 3498 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3499 calculated if N is given) For square matrices n is almost always m. 3500 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3501 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3502 . i - row indices 3503 . j - column indices 3504 - a - matrix values 3505 3506 Output Parameter: 3507 . mat - the matrix 3508 3509 Level: intermediate 3510 3511 Notes: 3512 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3513 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3514 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3515 3516 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3517 3518 The format which is used for the sparse matrix input, is equivalent to a 3519 row-major ordering.. i.e for the following matrix, the input data expected is 3520 as shown 3521 3522 $ 1 0 0 3523 $ 2 0 3 P0 3524 $ ------- 3525 $ 4 5 6 P1 3526 $ 3527 $ Process0 [P0]: rows_owned=[0,1] 3528 $ i = {0,1,3} [size = nrow+1 = 2+1] 3529 $ j = {0,0,2} [size = 3] 3530 $ v = {1,2,3} [size = 3] 3531 $ 3532 $ Process1 [P1]: rows_owned=[2] 3533 $ i = {0,3} [size = nrow+1 = 1+1] 3534 $ j = {0,1,2} [size = 3] 3535 $ v = {4,5,6} [size = 3] 3536 3537 .keywords: matrix, aij, compressed row, sparse, parallel 3538 3539 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3540 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3541 @*/ 3542 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3543 { 3544 PetscErrorCode ierr; 3545 3546 PetscFunctionBegin; 3547 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3548 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3549 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3550 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3551 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3552 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3553 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3554 PetscFunctionReturn(0); 3555 } 3556 3557 #undef __FUNCT__ 3558 #define __FUNCT__ "MatCreateAIJ" 3559 /*@C 3560 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3561 (the default parallel PETSc format). For good matrix assembly performance 3562 the user should preallocate the matrix storage by setting the parameters 3563 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3564 performance can be increased by more than a factor of 50. 3565 3566 Collective on MPI_Comm 3567 3568 Input Parameters: 3569 + comm - MPI communicator 3570 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3571 This value should be the same as the local size used in creating the 3572 y vector for the matrix-vector product y = Ax. 3573 . n - This value should be the same as the local size used in creating the 3574 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3575 calculated if N is given) For square matrices n is almost always m. 3576 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3577 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3578 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3579 (same value is used for all local rows) 3580 . d_nnz - array containing the number of nonzeros in the various rows of the 3581 DIAGONAL portion of the local submatrix (possibly different for each row) 3582 or NULL, if d_nz is used to specify the nonzero structure. 3583 The size of this array is equal to the number of local rows, i.e 'm'. 3584 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3585 submatrix (same value is used for all local rows). 3586 - o_nnz - array containing the number of nonzeros in the various rows of the 3587 OFF-DIAGONAL portion of the local submatrix (possibly different for 3588 each row) or NULL, if o_nz is used to specify the nonzero 3589 structure. The size of this array is equal to the number 3590 of local rows, i.e 'm'. 3591 3592 Output Parameter: 3593 . A - the matrix 3594 3595 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3596 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3597 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3598 3599 Notes: 3600 If the *_nnz parameter is given then the *_nz parameter is ignored 3601 3602 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3603 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3604 storage requirements for this matrix. 3605 3606 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3607 processor than it must be used on all processors that share the object for 3608 that argument. 3609 3610 The user MUST specify either the local or global matrix dimensions 3611 (possibly both). 3612 3613 The parallel matrix is partitioned across processors such that the 3614 first m0 rows belong to process 0, the next m1 rows belong to 3615 process 1, the next m2 rows belong to process 2 etc.. where 3616 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3617 values corresponding to [m x N] submatrix. 3618 3619 The columns are logically partitioned with the n0 columns belonging 3620 to 0th partition, the next n1 columns belonging to the next 3621 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3622 3623 The DIAGONAL portion of the local submatrix on any given processor 3624 is the submatrix corresponding to the rows and columns m,n 3625 corresponding to the given processor. i.e diagonal matrix on 3626 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3627 etc. The remaining portion of the local submatrix [m x (N-n)] 3628 constitute the OFF-DIAGONAL portion. The example below better 3629 illustrates this concept. 3630 3631 For a square global matrix we define each processor's diagonal portion 3632 to be its local rows and the corresponding columns (a square submatrix); 3633 each processor's off-diagonal portion encompasses the remainder of the 3634 local matrix (a rectangular submatrix). 3635 3636 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3637 3638 When calling this routine with a single process communicator, a matrix of 3639 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3640 type of communicator, use the construction mechanism: 3641 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3642 3643 By default, this format uses inodes (identical nodes) when possible. 3644 We search for consecutive rows with the same nonzero structure, thereby 3645 reusing matrix information to achieve increased efficiency. 3646 3647 Options Database Keys: 3648 + -mat_no_inode - Do not use inodes 3649 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3650 - -mat_aij_oneindex - Internally use indexing starting at 1 3651 rather than 0. Note that when calling MatSetValues(), 3652 the user still MUST index entries starting at 0! 3653 3654 3655 Example usage: 3656 3657 Consider the following 8x8 matrix with 34 non-zero values, that is 3658 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3659 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3660 as follows: 3661 3662 .vb 3663 1 2 0 | 0 3 0 | 0 4 3664 Proc0 0 5 6 | 7 0 0 | 8 0 3665 9 0 10 | 11 0 0 | 12 0 3666 ------------------------------------- 3667 13 0 14 | 15 16 17 | 0 0 3668 Proc1 0 18 0 | 19 20 21 | 0 0 3669 0 0 0 | 22 23 0 | 24 0 3670 ------------------------------------- 3671 Proc2 25 26 27 | 0 0 28 | 29 0 3672 30 0 0 | 31 32 33 | 0 34 3673 .ve 3674 3675 This can be represented as a collection of submatrices as: 3676 3677 .vb 3678 A B C 3679 D E F 3680 G H I 3681 .ve 3682 3683 Where the submatrices A,B,C are owned by proc0, D,E,F are 3684 owned by proc1, G,H,I are owned by proc2. 3685 3686 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3687 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3688 The 'M','N' parameters are 8,8, and have the same values on all procs. 3689 3690 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3691 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3692 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3693 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3694 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3695 matrix, ans [DF] as another SeqAIJ matrix. 3696 3697 When d_nz, o_nz parameters are specified, d_nz storage elements are 3698 allocated for every row of the local diagonal submatrix, and o_nz 3699 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3700 One way to choose d_nz and o_nz is to use the max nonzerors per local 3701 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3702 In this case, the values of d_nz,o_nz are: 3703 .vb 3704 proc0 : dnz = 2, o_nz = 2 3705 proc1 : dnz = 3, o_nz = 2 3706 proc2 : dnz = 1, o_nz = 4 3707 .ve 3708 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3709 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3710 for proc3. i.e we are using 12+15+10=37 storage locations to store 3711 34 values. 3712 3713 When d_nnz, o_nnz parameters are specified, the storage is specified 3714 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3715 In the above case the values for d_nnz,o_nnz are: 3716 .vb 3717 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3718 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3719 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3720 .ve 3721 Here the space allocated is sum of all the above values i.e 34, and 3722 hence pre-allocation is perfect. 3723 3724 Level: intermediate 3725 3726 .keywords: matrix, aij, compressed row, sparse, parallel 3727 3728 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3729 MPIAIJ, MatCreateMPIAIJWithArrays() 3730 @*/ 3731 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3732 { 3733 PetscErrorCode ierr; 3734 PetscMPIInt size; 3735 3736 PetscFunctionBegin; 3737 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3738 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3739 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3740 if (size > 1) { 3741 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3742 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3743 } else { 3744 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3745 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3746 } 3747 PetscFunctionReturn(0); 3748 } 3749 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3752 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3753 { 3754 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3755 PetscBool flg; 3756 PetscErrorCode ierr; 3757 3758 PetscFunctionBegin; 3759 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3760 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input"); 3761 if (Ad) *Ad = a->A; 3762 if (Ao) *Ao = a->B; 3763 if (colmap) *colmap = a->garray; 3764 PetscFunctionReturn(0); 3765 } 3766 3767 #undef __FUNCT__ 3768 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3769 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3770 { 3771 PetscErrorCode ierr; 3772 PetscInt i; 3773 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3774 3775 PetscFunctionBegin; 3776 if (coloring->ctype == IS_COLORING_GLOBAL) { 3777 ISColoringValue *allcolors,*colors; 3778 ISColoring ocoloring; 3779 3780 /* set coloring for diagonal portion */ 3781 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3782 3783 /* set coloring for off-diagonal portion */ 3784 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3785 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3786 for (i=0; i<a->B->cmap->n; i++) { 3787 colors[i] = allcolors[a->garray[i]]; 3788 } 3789 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3790 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3791 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3792 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3793 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3794 ISColoringValue *colors; 3795 PetscInt *larray; 3796 ISColoring ocoloring; 3797 3798 /* set coloring for diagonal portion */ 3799 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3800 for (i=0; i<a->A->cmap->n; i++) { 3801 larray[i] = i + A->cmap->rstart; 3802 } 3803 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3804 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3805 for (i=0; i<a->A->cmap->n; i++) { 3806 colors[i] = coloring->colors[larray[i]]; 3807 } 3808 ierr = PetscFree(larray);CHKERRQ(ierr); 3809 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3810 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3811 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3812 3813 /* set coloring for off-diagonal portion */ 3814 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3815 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3816 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3817 for (i=0; i<a->B->cmap->n; i++) { 3818 colors[i] = coloring->colors[larray[i]]; 3819 } 3820 ierr = PetscFree(larray);CHKERRQ(ierr); 3821 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3822 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3823 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3824 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3825 PetscFunctionReturn(0); 3826 } 3827 3828 #undef __FUNCT__ 3829 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3830 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3831 { 3832 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3833 PetscErrorCode ierr; 3834 3835 PetscFunctionBegin; 3836 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3837 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3838 PetscFunctionReturn(0); 3839 } 3840 3841 #undef __FUNCT__ 3842 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3843 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3844 { 3845 PetscErrorCode ierr; 3846 PetscInt m,N,i,rstart,nnz,Ii; 3847 PetscInt *indx; 3848 PetscScalar *values; 3849 3850 PetscFunctionBegin; 3851 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3852 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3853 PetscInt *dnz,*onz,sum,bs,cbs; 3854 3855 if (n == PETSC_DECIDE) { 3856 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3857 } 3858 /* Check sum(n) = N */ 3859 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3860 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3861 3862 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3863 rstart -= m; 3864 3865 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3866 for (i=0; i<m; i++) { 3867 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3868 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3869 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3870 } 3871 3872 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3873 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3874 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3875 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3876 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3877 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3878 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3879 } 3880 3881 /* numeric phase */ 3882 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3883 for (i=0; i<m; i++) { 3884 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3885 Ii = i + rstart; 3886 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3887 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3888 } 3889 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 PetscFunctionReturn(0); 3892 } 3893 3894 #undef __FUNCT__ 3895 #define __FUNCT__ "MatFileSplit" 3896 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3897 { 3898 PetscErrorCode ierr; 3899 PetscMPIInt rank; 3900 PetscInt m,N,i,rstart,nnz; 3901 size_t len; 3902 const PetscInt *indx; 3903 PetscViewer out; 3904 char *name; 3905 Mat B; 3906 const PetscScalar *values; 3907 3908 PetscFunctionBegin; 3909 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3910 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3911 /* Should this be the type of the diagonal block of A? */ 3912 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3913 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3914 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3915 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3916 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3917 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3918 for (i=0; i<m; i++) { 3919 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3920 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3921 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3922 } 3923 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3924 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3925 3926 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3927 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3928 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3929 sprintf(name,"%s.%d",outfile,rank); 3930 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3931 ierr = PetscFree(name);CHKERRQ(ierr); 3932 ierr = MatView(B,out);CHKERRQ(ierr); 3933 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3934 ierr = MatDestroy(&B);CHKERRQ(ierr); 3935 PetscFunctionReturn(0); 3936 } 3937 3938 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3939 #undef __FUNCT__ 3940 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3941 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3942 { 3943 PetscErrorCode ierr; 3944 Mat_Merge_SeqsToMPI *merge; 3945 PetscContainer container; 3946 3947 PetscFunctionBegin; 3948 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3949 if (container) { 3950 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3951 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3952 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3953 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3954 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3955 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3956 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3957 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3958 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3959 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3960 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3961 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3962 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3963 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3964 ierr = PetscFree(merge);CHKERRQ(ierr); 3965 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3966 } 3967 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3968 PetscFunctionReturn(0); 3969 } 3970 3971 #include <../src/mat/utils/freespace.h> 3972 #include <petscbt.h> 3973 3974 #undef __FUNCT__ 3975 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3976 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3977 { 3978 PetscErrorCode ierr; 3979 MPI_Comm comm; 3980 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3981 PetscMPIInt size,rank,taga,*len_s; 3982 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3983 PetscInt proc,m; 3984 PetscInt **buf_ri,**buf_rj; 3985 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3986 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3987 MPI_Request *s_waits,*r_waits; 3988 MPI_Status *status; 3989 MatScalar *aa=a->a; 3990 MatScalar **abuf_r,*ba_i; 3991 Mat_Merge_SeqsToMPI *merge; 3992 PetscContainer container; 3993 3994 PetscFunctionBegin; 3995 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3996 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3997 3998 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3999 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4000 4001 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4002 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4003 4004 bi = merge->bi; 4005 bj = merge->bj; 4006 buf_ri = merge->buf_ri; 4007 buf_rj = merge->buf_rj; 4008 4009 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4010 owners = merge->rowmap->range; 4011 len_s = merge->len_s; 4012 4013 /* send and recv matrix values */ 4014 /*-----------------------------*/ 4015 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4016 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4017 4018 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4019 for (proc=0,k=0; proc<size; proc++) { 4020 if (!len_s[proc]) continue; 4021 i = owners[proc]; 4022 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4023 k++; 4024 } 4025 4026 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4027 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4028 ierr = PetscFree(status);CHKERRQ(ierr); 4029 4030 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4031 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4032 4033 /* insert mat values of mpimat */ 4034 /*----------------------------*/ 4035 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4036 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4037 4038 for (k=0; k<merge->nrecv; k++) { 4039 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4040 nrows = *(buf_ri_k[k]); 4041 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4042 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4043 } 4044 4045 /* set values of ba */ 4046 m = merge->rowmap->n; 4047 for (i=0; i<m; i++) { 4048 arow = owners[rank] + i; 4049 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4050 bnzi = bi[i+1] - bi[i]; 4051 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4052 4053 /* add local non-zero vals of this proc's seqmat into ba */ 4054 anzi = ai[arow+1] - ai[arow]; 4055 aj = a->j + ai[arow]; 4056 aa = a->a + ai[arow]; 4057 nextaj = 0; 4058 for (j=0; nextaj<anzi; j++) { 4059 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4060 ba_i[j] += aa[nextaj++]; 4061 } 4062 } 4063 4064 /* add received vals into ba */ 4065 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4066 /* i-th row */ 4067 if (i == *nextrow[k]) { 4068 anzi = *(nextai[k]+1) - *nextai[k]; 4069 aj = buf_rj[k] + *(nextai[k]); 4070 aa = abuf_r[k] + *(nextai[k]); 4071 nextaj = 0; 4072 for (j=0; nextaj<anzi; j++) { 4073 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4074 ba_i[j] += aa[nextaj++]; 4075 } 4076 } 4077 nextrow[k]++; nextai[k]++; 4078 } 4079 } 4080 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4081 } 4082 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4083 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4084 4085 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4086 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4087 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4088 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4089 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4090 PetscFunctionReturn(0); 4091 } 4092 4093 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4094 4095 #undef __FUNCT__ 4096 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4097 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4098 { 4099 PetscErrorCode ierr; 4100 Mat B_mpi; 4101 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4102 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4103 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4104 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4105 PetscInt len,proc,*dnz,*onz,bs,cbs; 4106 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4107 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4108 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4109 MPI_Status *status; 4110 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4111 PetscBT lnkbt; 4112 Mat_Merge_SeqsToMPI *merge; 4113 PetscContainer container; 4114 4115 PetscFunctionBegin; 4116 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4117 4118 /* make sure it is a PETSc comm */ 4119 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4120 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4121 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4122 4123 ierr = PetscNew(&merge);CHKERRQ(ierr); 4124 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4125 4126 /* determine row ownership */ 4127 /*---------------------------------------------------------*/ 4128 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4129 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4130 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4131 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4132 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4133 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4134 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4135 4136 m = merge->rowmap->n; 4137 owners = merge->rowmap->range; 4138 4139 /* determine the number of messages to send, their lengths */ 4140 /*---------------------------------------------------------*/ 4141 len_s = merge->len_s; 4142 4143 len = 0; /* length of buf_si[] */ 4144 merge->nsend = 0; 4145 for (proc=0; proc<size; proc++) { 4146 len_si[proc] = 0; 4147 if (proc == rank) { 4148 len_s[proc] = 0; 4149 } else { 4150 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4151 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4152 } 4153 if (len_s[proc]) { 4154 merge->nsend++; 4155 nrows = 0; 4156 for (i=owners[proc]; i<owners[proc+1]; i++) { 4157 if (ai[i+1] > ai[i]) nrows++; 4158 } 4159 len_si[proc] = 2*(nrows+1); 4160 len += len_si[proc]; 4161 } 4162 } 4163 4164 /* determine the number and length of messages to receive for ij-structure */ 4165 /*-------------------------------------------------------------------------*/ 4166 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4167 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4168 4169 /* post the Irecv of j-structure */ 4170 /*-------------------------------*/ 4171 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4172 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4173 4174 /* post the Isend of j-structure */ 4175 /*--------------------------------*/ 4176 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4177 4178 for (proc=0, k=0; proc<size; proc++) { 4179 if (!len_s[proc]) continue; 4180 i = owners[proc]; 4181 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4182 k++; 4183 } 4184 4185 /* receives and sends of j-structure are complete */ 4186 /*------------------------------------------------*/ 4187 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4188 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4189 4190 /* send and recv i-structure */ 4191 /*---------------------------*/ 4192 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4193 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4194 4195 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4196 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4197 for (proc=0,k=0; proc<size; proc++) { 4198 if (!len_s[proc]) continue; 4199 /* form outgoing message for i-structure: 4200 buf_si[0]: nrows to be sent 4201 [1:nrows]: row index (global) 4202 [nrows+1:2*nrows+1]: i-structure index 4203 */ 4204 /*-------------------------------------------*/ 4205 nrows = len_si[proc]/2 - 1; 4206 buf_si_i = buf_si + nrows+1; 4207 buf_si[0] = nrows; 4208 buf_si_i[0] = 0; 4209 nrows = 0; 4210 for (i=owners[proc]; i<owners[proc+1]; i++) { 4211 anzi = ai[i+1] - ai[i]; 4212 if (anzi) { 4213 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4214 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4215 nrows++; 4216 } 4217 } 4218 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4219 k++; 4220 buf_si += len_si[proc]; 4221 } 4222 4223 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4224 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4225 4226 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4227 for (i=0; i<merge->nrecv; i++) { 4228 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4229 } 4230 4231 ierr = PetscFree(len_si);CHKERRQ(ierr); 4232 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4233 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4234 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4235 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4236 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4237 ierr = PetscFree(status);CHKERRQ(ierr); 4238 4239 /* compute a local seq matrix in each processor */ 4240 /*----------------------------------------------*/ 4241 /* allocate bi array and free space for accumulating nonzero column info */ 4242 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4243 bi[0] = 0; 4244 4245 /* create and initialize a linked list */ 4246 nlnk = N+1; 4247 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4248 4249 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4250 len = ai[owners[rank+1]] - ai[owners[rank]]; 4251 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4252 4253 current_space = free_space; 4254 4255 /* determine symbolic info for each local row */ 4256 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4257 4258 for (k=0; k<merge->nrecv; k++) { 4259 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4260 nrows = *buf_ri_k[k]; 4261 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4262 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4263 } 4264 4265 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4266 len = 0; 4267 for (i=0; i<m; i++) { 4268 bnzi = 0; 4269 /* add local non-zero cols of this proc's seqmat into lnk */ 4270 arow = owners[rank] + i; 4271 anzi = ai[arow+1] - ai[arow]; 4272 aj = a->j + ai[arow]; 4273 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4274 bnzi += nlnk; 4275 /* add received col data into lnk */ 4276 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4277 if (i == *nextrow[k]) { /* i-th row */ 4278 anzi = *(nextai[k]+1) - *nextai[k]; 4279 aj = buf_rj[k] + *nextai[k]; 4280 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4281 bnzi += nlnk; 4282 nextrow[k]++; nextai[k]++; 4283 } 4284 } 4285 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4286 4287 /* if free space is not available, make more free space */ 4288 if (current_space->local_remaining<bnzi) { 4289 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4290 nspacedouble++; 4291 } 4292 /* copy data into free space, then initialize lnk */ 4293 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4294 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4295 4296 current_space->array += bnzi; 4297 current_space->local_used += bnzi; 4298 current_space->local_remaining -= bnzi; 4299 4300 bi[i+1] = bi[i] + bnzi; 4301 } 4302 4303 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4304 4305 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4306 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4307 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4308 4309 /* create symbolic parallel matrix B_mpi */ 4310 /*---------------------------------------*/ 4311 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4312 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4313 if (n==PETSC_DECIDE) { 4314 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4315 } else { 4316 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4317 } 4318 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4319 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4320 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4321 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4322 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4323 4324 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4325 B_mpi->assembled = PETSC_FALSE; 4326 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4327 merge->bi = bi; 4328 merge->bj = bj; 4329 merge->buf_ri = buf_ri; 4330 merge->buf_rj = buf_rj; 4331 merge->coi = NULL; 4332 merge->coj = NULL; 4333 merge->owners_co = NULL; 4334 4335 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4336 4337 /* attach the supporting struct to B_mpi for reuse */ 4338 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4339 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4340 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4341 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4342 *mpimat = B_mpi; 4343 4344 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4345 PetscFunctionReturn(0); 4346 } 4347 4348 #undef __FUNCT__ 4349 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4350 /*@C 4351 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4352 matrices from each processor 4353 4354 Collective on MPI_Comm 4355 4356 Input Parameters: 4357 + comm - the communicators the parallel matrix will live on 4358 . seqmat - the input sequential matrices 4359 . m - number of local rows (or PETSC_DECIDE) 4360 . n - number of local columns (or PETSC_DECIDE) 4361 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4362 4363 Output Parameter: 4364 . mpimat - the parallel matrix generated 4365 4366 Level: advanced 4367 4368 Notes: 4369 The dimensions of the sequential matrix in each processor MUST be the same. 4370 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4371 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4372 @*/ 4373 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4374 { 4375 PetscErrorCode ierr; 4376 PetscMPIInt size; 4377 4378 PetscFunctionBegin; 4379 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4380 if (size == 1) { 4381 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4382 if (scall == MAT_INITIAL_MATRIX) { 4383 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4384 } else { 4385 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4386 } 4387 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4388 PetscFunctionReturn(0); 4389 } 4390 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4391 if (scall == MAT_INITIAL_MATRIX) { 4392 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4393 } 4394 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4395 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4396 PetscFunctionReturn(0); 4397 } 4398 4399 #undef __FUNCT__ 4400 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4401 /*@ 4402 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4403 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4404 with MatGetSize() 4405 4406 Not Collective 4407 4408 Input Parameters: 4409 + A - the matrix 4410 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4411 4412 Output Parameter: 4413 . A_loc - the local sequential matrix generated 4414 4415 Level: developer 4416 4417 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4418 4419 @*/ 4420 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4421 { 4422 PetscErrorCode ierr; 4423 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4424 Mat_SeqAIJ *mat,*a,*b; 4425 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4426 MatScalar *aa,*ba,*cam; 4427 PetscScalar *ca; 4428 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4429 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4430 PetscBool match; 4431 MPI_Comm comm; 4432 PetscMPIInt size; 4433 4434 PetscFunctionBegin; 4435 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4436 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4437 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4438 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4439 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4440 4441 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4442 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4443 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4444 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4445 aa = a->a; ba = b->a; 4446 if (scall == MAT_INITIAL_MATRIX) { 4447 if (size == 1) { 4448 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4449 PetscFunctionReturn(0); 4450 } 4451 4452 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4453 ci[0] = 0; 4454 for (i=0; i<am; i++) { 4455 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4456 } 4457 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4458 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4459 k = 0; 4460 for (i=0; i<am; i++) { 4461 ncols_o = bi[i+1] - bi[i]; 4462 ncols_d = ai[i+1] - ai[i]; 4463 /* off-diagonal portion of A */ 4464 for (jo=0; jo<ncols_o; jo++) { 4465 col = cmap[*bj]; 4466 if (col >= cstart) break; 4467 cj[k] = col; bj++; 4468 ca[k++] = *ba++; 4469 } 4470 /* diagonal portion of A */ 4471 for (j=0; j<ncols_d; j++) { 4472 cj[k] = cstart + *aj++; 4473 ca[k++] = *aa++; 4474 } 4475 /* off-diagonal portion of A */ 4476 for (j=jo; j<ncols_o; j++) { 4477 cj[k] = cmap[*bj++]; 4478 ca[k++] = *ba++; 4479 } 4480 } 4481 /* put together the new matrix */ 4482 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4483 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4484 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4485 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4486 mat->free_a = PETSC_TRUE; 4487 mat->free_ij = PETSC_TRUE; 4488 mat->nonew = 0; 4489 } else if (scall == MAT_REUSE_MATRIX) { 4490 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4491 ci = mat->i; cj = mat->j; cam = mat->a; 4492 for (i=0; i<am; i++) { 4493 /* off-diagonal portion of A */ 4494 ncols_o = bi[i+1] - bi[i]; 4495 for (jo=0; jo<ncols_o; jo++) { 4496 col = cmap[*bj]; 4497 if (col >= cstart) break; 4498 *cam++ = *ba++; bj++; 4499 } 4500 /* diagonal portion of A */ 4501 ncols_d = ai[i+1] - ai[i]; 4502 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4503 /* off-diagonal portion of A */ 4504 for (j=jo; j<ncols_o; j++) { 4505 *cam++ = *ba++; bj++; 4506 } 4507 } 4508 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4509 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4515 /*@C 4516 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4517 4518 Not Collective 4519 4520 Input Parameters: 4521 + A - the matrix 4522 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4523 - row, col - index sets of rows and columns to extract (or NULL) 4524 4525 Output Parameter: 4526 . A_loc - the local sequential matrix generated 4527 4528 Level: developer 4529 4530 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4531 4532 @*/ 4533 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4534 { 4535 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4536 PetscErrorCode ierr; 4537 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4538 IS isrowa,iscola; 4539 Mat *aloc; 4540 PetscBool match; 4541 4542 PetscFunctionBegin; 4543 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4544 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4545 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4546 if (!row) { 4547 start = A->rmap->rstart; end = A->rmap->rend; 4548 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4549 } else { 4550 isrowa = *row; 4551 } 4552 if (!col) { 4553 start = A->cmap->rstart; 4554 cmap = a->garray; 4555 nzA = a->A->cmap->n; 4556 nzB = a->B->cmap->n; 4557 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4558 ncols = 0; 4559 for (i=0; i<nzB; i++) { 4560 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4561 else break; 4562 } 4563 imark = i; 4564 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4565 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4566 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4567 } else { 4568 iscola = *col; 4569 } 4570 if (scall != MAT_INITIAL_MATRIX) { 4571 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4572 aloc[0] = *A_loc; 4573 } 4574 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4575 *A_loc = aloc[0]; 4576 ierr = PetscFree(aloc);CHKERRQ(ierr); 4577 if (!row) { 4578 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4579 } 4580 if (!col) { 4581 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4582 } 4583 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #undef __FUNCT__ 4588 #define __FUNCT__ "MatGetBrowsOfAcols" 4589 /*@C 4590 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4591 4592 Collective on Mat 4593 4594 Input Parameters: 4595 + A,B - the matrices in mpiaij format 4596 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4597 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4598 4599 Output Parameter: 4600 + rowb, colb - index sets of rows and columns of B to extract 4601 - B_seq - the sequential matrix generated 4602 4603 Level: developer 4604 4605 @*/ 4606 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4607 { 4608 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4609 PetscErrorCode ierr; 4610 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4611 IS isrowb,iscolb; 4612 Mat *bseq=NULL; 4613 4614 PetscFunctionBegin; 4615 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4616 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4617 } 4618 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4619 4620 if (scall == MAT_INITIAL_MATRIX) { 4621 start = A->cmap->rstart; 4622 cmap = a->garray; 4623 nzA = a->A->cmap->n; 4624 nzB = a->B->cmap->n; 4625 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4626 ncols = 0; 4627 for (i=0; i<nzB; i++) { /* row < local row index */ 4628 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4629 else break; 4630 } 4631 imark = i; 4632 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4633 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4634 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4635 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4636 } else { 4637 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4638 isrowb = *rowb; iscolb = *colb; 4639 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4640 bseq[0] = *B_seq; 4641 } 4642 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4643 *B_seq = bseq[0]; 4644 ierr = PetscFree(bseq);CHKERRQ(ierr); 4645 if (!rowb) { 4646 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4647 } else { 4648 *rowb = isrowb; 4649 } 4650 if (!colb) { 4651 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4652 } else { 4653 *colb = iscolb; 4654 } 4655 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4656 PetscFunctionReturn(0); 4657 } 4658 4659 #undef __FUNCT__ 4660 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4661 /* 4662 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4663 of the OFF-DIAGONAL portion of local A 4664 4665 Collective on Mat 4666 4667 Input Parameters: 4668 + A,B - the matrices in mpiaij format 4669 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4670 4671 Output Parameter: 4672 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4673 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4674 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4675 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4676 4677 Level: developer 4678 4679 */ 4680 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4681 { 4682 VecScatter_MPI_General *gen_to,*gen_from; 4683 PetscErrorCode ierr; 4684 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4685 Mat_SeqAIJ *b_oth; 4686 VecScatter ctx =a->Mvctx; 4687 MPI_Comm comm; 4688 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4689 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4690 PetscScalar *rvalues,*svalues; 4691 MatScalar *b_otha,*bufa,*bufA; 4692 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4693 MPI_Request *rwaits = NULL,*swaits = NULL; 4694 MPI_Status *sstatus,rstatus; 4695 PetscMPIInt jj,size; 4696 PetscInt *cols,sbs,rbs; 4697 PetscScalar *vals; 4698 4699 PetscFunctionBegin; 4700 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4701 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4702 4703 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4704 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4705 } 4706 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4707 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4708 4709 gen_to = (VecScatter_MPI_General*)ctx->todata; 4710 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4711 rvalues = gen_from->values; /* holds the length of receiving row */ 4712 svalues = gen_to->values; /* holds the length of sending row */ 4713 nrecvs = gen_from->n; 4714 nsends = gen_to->n; 4715 4716 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4717 srow = gen_to->indices; /* local row index to be sent */ 4718 sstarts = gen_to->starts; 4719 sprocs = gen_to->procs; 4720 sstatus = gen_to->sstatus; 4721 sbs = gen_to->bs; 4722 rstarts = gen_from->starts; 4723 rprocs = gen_from->procs; 4724 rbs = gen_from->bs; 4725 4726 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4727 if (scall == MAT_INITIAL_MATRIX) { 4728 /* i-array */ 4729 /*---------*/ 4730 /* post receives */ 4731 for (i=0; i<nrecvs; i++) { 4732 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4733 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4734 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4735 } 4736 4737 /* pack the outgoing message */ 4738 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4739 4740 sstartsj[0] = 0; 4741 rstartsj[0] = 0; 4742 len = 0; /* total length of j or a array to be sent */ 4743 k = 0; 4744 for (i=0; i<nsends; i++) { 4745 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4746 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4747 for (j=0; j<nrows; j++) { 4748 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4749 for (l=0; l<sbs; l++) { 4750 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4751 4752 rowlen[j*sbs+l] = ncols; 4753 4754 len += ncols; 4755 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4756 } 4757 k++; 4758 } 4759 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4760 4761 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4762 } 4763 /* recvs and sends of i-array are completed */ 4764 i = nrecvs; 4765 while (i--) { 4766 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4767 } 4768 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4769 4770 /* allocate buffers for sending j and a arrays */ 4771 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4772 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4773 4774 /* create i-array of B_oth */ 4775 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4776 4777 b_othi[0] = 0; 4778 len = 0; /* total length of j or a array to be received */ 4779 k = 0; 4780 for (i=0; i<nrecvs; i++) { 4781 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4782 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4783 for (j=0; j<nrows; j++) { 4784 b_othi[k+1] = b_othi[k] + rowlen[j]; 4785 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4786 k++; 4787 } 4788 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4789 } 4790 4791 /* allocate space for j and a arrrays of B_oth */ 4792 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4793 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4794 4795 /* j-array */ 4796 /*---------*/ 4797 /* post receives of j-array */ 4798 for (i=0; i<nrecvs; i++) { 4799 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4800 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4801 } 4802 4803 /* pack the outgoing message j-array */ 4804 k = 0; 4805 for (i=0; i<nsends; i++) { 4806 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4807 bufJ = bufj+sstartsj[i]; 4808 for (j=0; j<nrows; j++) { 4809 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4810 for (ll=0; ll<sbs; ll++) { 4811 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4812 for (l=0; l<ncols; l++) { 4813 *bufJ++ = cols[l]; 4814 } 4815 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4816 } 4817 } 4818 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4819 } 4820 4821 /* recvs and sends of j-array are completed */ 4822 i = nrecvs; 4823 while (i--) { 4824 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4825 } 4826 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4827 } else if (scall == MAT_REUSE_MATRIX) { 4828 sstartsj = *startsj_s; 4829 rstartsj = *startsj_r; 4830 bufa = *bufa_ptr; 4831 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4832 b_otha = b_oth->a; 4833 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4834 4835 /* a-array */ 4836 /*---------*/ 4837 /* post receives of a-array */ 4838 for (i=0; i<nrecvs; i++) { 4839 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4840 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4841 } 4842 4843 /* pack the outgoing message a-array */ 4844 k = 0; 4845 for (i=0; i<nsends; i++) { 4846 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4847 bufA = bufa+sstartsj[i]; 4848 for (j=0; j<nrows; j++) { 4849 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4850 for (ll=0; ll<sbs; ll++) { 4851 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4852 for (l=0; l<ncols; l++) { 4853 *bufA++ = vals[l]; 4854 } 4855 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4856 } 4857 } 4858 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4859 } 4860 /* recvs and sends of a-array are completed */ 4861 i = nrecvs; 4862 while (i--) { 4863 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4864 } 4865 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4866 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4867 4868 if (scall == MAT_INITIAL_MATRIX) { 4869 /* put together the new matrix */ 4870 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4871 4872 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4873 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4874 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4875 b_oth->free_a = PETSC_TRUE; 4876 b_oth->free_ij = PETSC_TRUE; 4877 b_oth->nonew = 0; 4878 4879 ierr = PetscFree(bufj);CHKERRQ(ierr); 4880 if (!startsj_s || !bufa_ptr) { 4881 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4882 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4883 } else { 4884 *startsj_s = sstartsj; 4885 *startsj_r = rstartsj; 4886 *bufa_ptr = bufa; 4887 } 4888 } 4889 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4890 PetscFunctionReturn(0); 4891 } 4892 4893 #undef __FUNCT__ 4894 #define __FUNCT__ "MatGetCommunicationStructs" 4895 /*@C 4896 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4897 4898 Not Collective 4899 4900 Input Parameters: 4901 . A - The matrix in mpiaij format 4902 4903 Output Parameter: 4904 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4905 . colmap - A map from global column index to local index into lvec 4906 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4907 4908 Level: developer 4909 4910 @*/ 4911 #if defined(PETSC_USE_CTABLE) 4912 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4913 #else 4914 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4915 #endif 4916 { 4917 Mat_MPIAIJ *a; 4918 4919 PetscFunctionBegin; 4920 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4921 PetscValidPointer(lvec, 2); 4922 PetscValidPointer(colmap, 3); 4923 PetscValidPointer(multScatter, 4); 4924 a = (Mat_MPIAIJ*) A->data; 4925 if (lvec) *lvec = a->lvec; 4926 if (colmap) *colmap = a->colmap; 4927 if (multScatter) *multScatter = a->Mvctx; 4928 PetscFunctionReturn(0); 4929 } 4930 4931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4934 #if defined(PETSC_HAVE_ELEMENTAL) 4935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4936 #endif 4937 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4940 /* 4941 Computes (B'*A')' since computing B*A directly is untenable 4942 4943 n p p 4944 ( ) ( ) ( ) 4945 m ( A ) * n ( B ) = m ( C ) 4946 ( ) ( ) ( ) 4947 4948 */ 4949 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4950 { 4951 PetscErrorCode ierr; 4952 Mat At,Bt,Ct; 4953 4954 PetscFunctionBegin; 4955 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4956 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4957 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4958 ierr = MatDestroy(&At);CHKERRQ(ierr); 4959 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4960 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4961 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 4965 #undef __FUNCT__ 4966 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4967 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4968 { 4969 PetscErrorCode ierr; 4970 PetscInt m=A->rmap->n,n=B->cmap->n; 4971 Mat Cmat; 4972 4973 PetscFunctionBegin; 4974 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4975 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4976 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4977 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4978 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4979 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4980 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4981 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4982 4983 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4984 4985 *C = Cmat; 4986 PetscFunctionReturn(0); 4987 } 4988 4989 /* ----------------------------------------------------------------*/ 4990 #undef __FUNCT__ 4991 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4992 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4993 { 4994 PetscErrorCode ierr; 4995 4996 PetscFunctionBegin; 4997 if (scall == MAT_INITIAL_MATRIX) { 4998 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4999 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5000 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5001 } 5002 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5003 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5004 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 5008 /*MC 5009 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5010 5011 Options Database Keys: 5012 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5013 5014 Level: beginner 5015 5016 .seealso: MatCreateAIJ() 5017 M*/ 5018 5019 #undef __FUNCT__ 5020 #define __FUNCT__ "MatCreate_MPIAIJ" 5021 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5022 { 5023 Mat_MPIAIJ *b; 5024 PetscErrorCode ierr; 5025 PetscMPIInt size; 5026 5027 PetscFunctionBegin; 5028 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5029 5030 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5031 B->data = (void*)b; 5032 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5033 B->assembled = PETSC_FALSE; 5034 B->insertmode = NOT_SET_VALUES; 5035 b->size = size; 5036 5037 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5038 5039 /* build cache for off array entries formed */ 5040 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5041 5042 b->donotstash = PETSC_FALSE; 5043 b->colmap = 0; 5044 b->garray = 0; 5045 b->roworiented = PETSC_TRUE; 5046 5047 /* stuff used for matrix vector multiply */ 5048 b->lvec = NULL; 5049 b->Mvctx = NULL; 5050 5051 /* stuff for MatGetRow() */ 5052 b->rowindices = 0; 5053 b->rowvalues = 0; 5054 b->getrowactive = PETSC_FALSE; 5055 5056 /* flexible pointer used in CUSP/CUSPARSE classes */ 5057 b->spptr = NULL; 5058 5059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5070 #if defined(PETSC_HAVE_ELEMENTAL) 5071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5072 #endif 5073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5076 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5077 PetscFunctionReturn(0); 5078 } 5079 5080 #undef __FUNCT__ 5081 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5082 /*@C 5083 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5084 and "off-diagonal" part of the matrix in CSR format. 5085 5086 Collective on MPI_Comm 5087 5088 Input Parameters: 5089 + comm - MPI communicator 5090 . m - number of local rows (Cannot be PETSC_DECIDE) 5091 . n - This value should be the same as the local size used in creating the 5092 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5093 calculated if N is given) For square matrices n is almost always m. 5094 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5095 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5096 . i - row indices for "diagonal" portion of matrix 5097 . j - column indices 5098 . a - matrix values 5099 . oi - row indices for "off-diagonal" portion of matrix 5100 . oj - column indices 5101 - oa - matrix values 5102 5103 Output Parameter: 5104 . mat - the matrix 5105 5106 Level: advanced 5107 5108 Notes: 5109 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5110 must free the arrays once the matrix has been destroyed and not before. 5111 5112 The i and j indices are 0 based 5113 5114 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5115 5116 This sets local rows and cannot be used to set off-processor values. 5117 5118 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5119 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5120 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5121 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5122 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5123 communication if it is known that only local entries will be set. 5124 5125 .keywords: matrix, aij, compressed row, sparse, parallel 5126 5127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5128 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5129 @*/ 5130 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5131 { 5132 PetscErrorCode ierr; 5133 Mat_MPIAIJ *maij; 5134 5135 PetscFunctionBegin; 5136 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5137 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5138 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5139 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5140 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5141 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5142 maij = (Mat_MPIAIJ*) (*mat)->data; 5143 5144 (*mat)->preallocated = PETSC_TRUE; 5145 5146 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5147 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5148 5149 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5150 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5151 5152 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5153 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5154 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5155 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5156 5157 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5158 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5159 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5160 PetscFunctionReturn(0); 5161 } 5162 5163 /* 5164 Special version for direct calls from Fortran 5165 */ 5166 #include <petsc/private/fortranimpl.h> 5167 5168 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5169 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5170 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5171 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5172 #endif 5173 5174 /* Change these macros so can be used in void function */ 5175 #undef CHKERRQ 5176 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5177 #undef SETERRQ2 5178 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5179 #undef SETERRQ3 5180 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5181 #undef SETERRQ 5182 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5183 5184 #undef __FUNCT__ 5185 #define __FUNCT__ "matsetvaluesmpiaij_" 5186 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5187 { 5188 Mat mat = *mmat; 5189 PetscInt m = *mm, n = *mn; 5190 InsertMode addv = *maddv; 5191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5192 PetscScalar value; 5193 PetscErrorCode ierr; 5194 5195 MatCheckPreallocated(mat,1); 5196 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5197 5198 #if defined(PETSC_USE_DEBUG) 5199 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5200 #endif 5201 { 5202 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5203 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5204 PetscBool roworiented = aij->roworiented; 5205 5206 /* Some Variables required in the macro */ 5207 Mat A = aij->A; 5208 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5209 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5210 MatScalar *aa = a->a; 5211 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5212 Mat B = aij->B; 5213 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5214 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5215 MatScalar *ba = b->a; 5216 5217 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5218 PetscInt nonew = a->nonew; 5219 MatScalar *ap1,*ap2; 5220 5221 PetscFunctionBegin; 5222 for (i=0; i<m; i++) { 5223 if (im[i] < 0) continue; 5224 #if defined(PETSC_USE_DEBUG) 5225 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5226 #endif 5227 if (im[i] >= rstart && im[i] < rend) { 5228 row = im[i] - rstart; 5229 lastcol1 = -1; 5230 rp1 = aj + ai[row]; 5231 ap1 = aa + ai[row]; 5232 rmax1 = aimax[row]; 5233 nrow1 = ailen[row]; 5234 low1 = 0; 5235 high1 = nrow1; 5236 lastcol2 = -1; 5237 rp2 = bj + bi[row]; 5238 ap2 = ba + bi[row]; 5239 rmax2 = bimax[row]; 5240 nrow2 = bilen[row]; 5241 low2 = 0; 5242 high2 = nrow2; 5243 5244 for (j=0; j<n; j++) { 5245 if (roworiented) value = v[i*n+j]; 5246 else value = v[i+j*m]; 5247 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5248 if (in[j] >= cstart && in[j] < cend) { 5249 col = in[j] - cstart; 5250 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5251 } else if (in[j] < 0) continue; 5252 #if defined(PETSC_USE_DEBUG) 5253 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5254 #endif 5255 else { 5256 if (mat->was_assembled) { 5257 if (!aij->colmap) { 5258 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5259 } 5260 #if defined(PETSC_USE_CTABLE) 5261 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5262 col--; 5263 #else 5264 col = aij->colmap[in[j]] - 1; 5265 #endif 5266 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5267 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5268 col = in[j]; 5269 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5270 B = aij->B; 5271 b = (Mat_SeqAIJ*)B->data; 5272 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5273 rp2 = bj + bi[row]; 5274 ap2 = ba + bi[row]; 5275 rmax2 = bimax[row]; 5276 nrow2 = bilen[row]; 5277 low2 = 0; 5278 high2 = nrow2; 5279 bm = aij->B->rmap->n; 5280 ba = b->a; 5281 } 5282 } else col = in[j]; 5283 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5284 } 5285 } 5286 } else if (!aij->donotstash) { 5287 if (roworiented) { 5288 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5289 } else { 5290 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5291 } 5292 } 5293 } 5294 } 5295 PetscFunctionReturnVoid(); 5296 } 5297 5298