1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *owners = A->rmap->range; 787 PetscInt n = A->rmap->n; 788 PetscSF sf; 789 PetscInt *lrows; 790 PetscSFNode *rrows; 791 PetscInt r, p = 0, len = 0; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 /* Create SF where leaves are input rows and roots are owned rows */ 796 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 797 for (r = 0; r < n; ++r) lrows[r] = -1; 798 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 799 for (r = 0; r < N; ++r) { 800 const PetscInt idx = rows[r]; 801 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 802 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 803 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 804 } 805 if (A->nooffproczerorows) { 806 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 807 lrows[len++] = idx - owners[p]; 808 } else { 809 rrows[r].rank = p; 810 rrows[r].index = rows[r] - owners[p]; 811 } 812 } 813 if (!A->nooffproczerorows) { 814 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 815 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 816 /* Collect flags for rows to be zeroed */ 817 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 819 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 820 /* Compress and put in row numbers */ 821 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 822 } 823 /* fix right hand side if needed */ 824 if (x && b) { 825 const PetscScalar *xx; 826 PetscScalar *bb; 827 828 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 829 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 831 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 833 } 834 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 835 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 836 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 837 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 838 } else if (diag != 0.0) { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 841 for (r = 0; r < len; ++r) { 842 const PetscInt row = lrows[r] + A->rmap->rstart; 843 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 844 } 845 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 } else { 848 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 849 } 850 ierr = PetscFree(lrows);CHKERRQ(ierr); 851 852 /* only change matrix nonzero state if pattern was allowed to be changed */ 853 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 854 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 855 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 865 PetscErrorCode ierr; 866 PetscMPIInt n = A->rmap->n; 867 PetscInt i,j,r,m,p = 0,len = 0; 868 PetscInt *lrows,*owners = A->rmap->range; 869 PetscSFNode *rrows; 870 PetscSF sf; 871 const PetscScalar *xx; 872 PetscScalar *bb,*mask; 873 Vec xmask,lmask; 874 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 875 const PetscInt *aj, *ii,*ridx; 876 PetscScalar *aa; 877 878 PetscFunctionBegin; 879 /* Create SF where leaves are input rows and roots are owned rows */ 880 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 881 for (r = 0; r < n; ++r) lrows[r] = -1; 882 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 883 for (r = 0; r < N; ++r) { 884 const PetscInt idx = rows[r]; 885 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 886 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 887 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 888 } 889 rrows[r].rank = p; 890 rrows[r].index = rows[r] - owners[p]; 891 } 892 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 893 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 894 /* Collect flags for rows to be zeroed */ 895 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 896 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 897 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 898 /* Compress and put in row numbers */ 899 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 900 /* zero diagonal part of matrix */ 901 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 902 /* handle off diagonal part of matrix */ 903 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 904 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 905 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 906 for (i=0; i<len; i++) bb[lrows[i]] = 1; 907 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 908 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 911 if (x) { 912 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 916 } 917 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 918 /* remove zeroed rows of off diagonal matrix */ 919 ii = aij->i; 920 for (i=0; i<len; i++) { 921 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 922 } 923 /* loop over all elements of off process part of matrix zeroing removed columns*/ 924 if (aij->compressedrow.use) { 925 m = aij->compressedrow.nrows; 926 ii = aij->compressedrow.i; 927 ridx = aij->compressedrow.rindex; 928 for (i=0; i<m; i++) { 929 n = ii[i+1] - ii[i]; 930 aj = aij->j + ii[i]; 931 aa = aij->a + ii[i]; 932 933 for (j=0; j<n; j++) { 934 if (PetscAbsScalar(mask[*aj])) { 935 if (b) bb[*ridx] -= *aa*xx[*aj]; 936 *aa = 0.0; 937 } 938 aa++; 939 aj++; 940 } 941 ridx++; 942 } 943 } else { /* do not use compressed row format */ 944 m = l->B->rmap->n; 945 for (i=0; i<m; i++) { 946 n = ii[i+1] - ii[i]; 947 aj = aij->j + ii[i]; 948 aa = aij->a + ii[i]; 949 for (j=0; j<n; j++) { 950 if (PetscAbsScalar(mask[*aj])) { 951 if (b) bb[i] -= *aa*xx[*aj]; 952 *aa = 0.0; 953 } 954 aa++; 955 aj++; 956 } 957 } 958 } 959 if (x) { 960 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 961 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 962 } 963 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 964 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 965 ierr = PetscFree(lrows);CHKERRQ(ierr); 966 967 /* only change matrix nonzero state if pattern was allowed to be changed */ 968 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 969 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 970 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMult_MPIAIJ" 977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscInt nt; 982 983 PetscFunctionBegin; 984 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 985 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1008 { 1009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscBool merged; 1027 1028 PetscFunctionBegin; 1029 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1030 /* do nondiagonal part */ 1031 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1032 if (!merged) { 1033 /* send it on its way */ 1034 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 /* do local part */ 1036 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1037 /* receive remote parts: note this assumes the values are not actually */ 1038 /* added in yy until the next line, */ 1039 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1040 } else { 1041 /* do local part */ 1042 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* values actually were received in the Begin() but we need to call this nop */ 1046 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1053 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1054 { 1055 MPI_Comm comm; 1056 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1057 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1058 IS Me,Notme; 1059 PetscErrorCode ierr; 1060 PetscInt M,N,first,last,*notme,i; 1061 PetscMPIInt size; 1062 1063 PetscFunctionBegin; 1064 /* Easy test: symmetric diagonal block */ 1065 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1066 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1067 if (!*f) PetscFunctionReturn(0); 1068 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1070 if (size == 1) PetscFunctionReturn(0); 1071 1072 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1073 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1074 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1076 for (i=0; i<first; i++) notme[i] = i; 1077 for (i=last; i<M; i++) notme[i-last+first] = i; 1078 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1079 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1080 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1081 Aoff = Aoffs[0]; 1082 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1083 Boff = Boffs[0]; 1084 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1085 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1086 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1087 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1088 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1089 ierr = PetscFree(notme);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 /* do nondiagonal part */ 1102 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1103 /* send it on its way */ 1104 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1105 /* do local part */ 1106 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1107 /* receive remote parts */ 1108 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 This only works correctly for square matrices where the subblock A->A is the 1114 diagonal block 1115 */ 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1119 { 1120 PetscErrorCode ierr; 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 1123 PetscFunctionBegin; 1124 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1125 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1126 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatScale_MPIAIJ" 1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1139 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatDestroy_MPIAIJ" 1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 #if defined(PETSC_USE_LOG) 1152 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1153 #endif 1154 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1156 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1157 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_CTABLE) 1159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1160 #else 1161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1166 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1167 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1168 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1169 1170 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1192 PetscErrorCode ierr; 1193 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1194 int fd; 1195 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1196 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1197 PetscScalar *column_values; 1198 PetscInt message_count,flowcontrolcount; 1199 FILE *file; 1200 1201 PetscFunctionBegin; 1202 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1204 nz = A->nz + B->nz; 1205 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1206 if (!rank) { 1207 header[0] = MAT_FILE_CLASSID; 1208 header[1] = mat->rmap->N; 1209 header[2] = mat->cmap->N; 1210 1211 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 /* get largest number of rows any processor has */ 1214 rlen = mat->rmap->n; 1215 range = mat->rmap->range; 1216 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1217 } else { 1218 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 rlen = mat->rmap->n; 1220 } 1221 1222 /* load up the local row counts */ 1223 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1224 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1225 1226 /* store the row lengths to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 for (i=1; i<size; i++) { 1231 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1232 rlen = range[i+1] - range[i]; 1233 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1235 } 1236 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1237 } else { 1238 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1239 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1241 } 1242 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1243 1244 /* load up the local column indices */ 1245 nzmax = nz; /* th processor needs space a largest processor needs */ 1246 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1248 cnt = 0; 1249 for (i=0; i<mat->rmap->n; i++) { 1250 for (j=B->i[i]; j<B->i[i+1]; j++) { 1251 if ((col = garray[B->j[j]]) > cstart) break; 1252 column_indices[cnt++] = col; 1253 } 1254 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1255 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1256 } 1257 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1258 1259 /* store the column indices to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 MPI_Status status; 1263 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1268 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1270 } 1271 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1274 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1277 } 1278 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1279 1280 /* load up the local column values */ 1281 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1282 cnt = 0; 1283 for (i=0; i<mat->rmap->n; i++) { 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if (garray[B->j[j]] > cstart) break; 1286 column_values[cnt++] = B->a[j]; 1287 } 1288 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1289 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1290 } 1291 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1292 1293 /* store the column values to the file */ 1294 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1295 if (!rank) { 1296 MPI_Status status; 1297 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1301 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1302 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1304 } 1305 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1308 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(column_values);CHKERRQ(ierr); 1313 1314 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1315 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #include <petscdraw.h> 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1323 { 1324 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 PetscMPIInt rank = aij->rank,size = aij->size; 1327 PetscBool isdraw,iascii,isbinary; 1328 PetscViewer sviewer; 1329 PetscViewerFormat format; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1333 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1335 if (iascii) { 1336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1337 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1338 MatInfo info; 1339 PetscBool inodes; 1340 1341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1342 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1343 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1344 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1345 if (!inodes) { 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1347 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1348 } else { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } 1352 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1354 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1356 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1359 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1362 PetscInt inodecount,inodelimit,*inodes; 1363 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1364 if (inodes) { 1365 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1366 } else { 1367 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1371 PetscFunctionReturn(0); 1372 } 1373 } else if (isbinary) { 1374 if (size == 1) { 1375 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1376 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } else if (isdraw) { 1382 PetscDraw draw; 1383 PetscBool isnull; 1384 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1385 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1386 } 1387 1388 { 1389 /* assemble the entire matrix onto first processor. */ 1390 Mat A; 1391 Mat_SeqAIJ *Aloc; 1392 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1393 MatScalar *a; 1394 1395 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1396 if (!rank) { 1397 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1398 } else { 1399 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1400 } 1401 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1402 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1403 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1404 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1405 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1406 1407 /* copy over the A part */ 1408 Aloc = (Mat_SeqAIJ*)aij->A->data; 1409 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1410 row = mat->rmap->rstart; 1411 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1412 for (i=0; i<m; i++) { 1413 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1414 row++; 1415 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1416 } 1417 aj = Aloc->j; 1418 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1419 1420 /* copy over the B part */ 1421 Aloc = (Mat_SeqAIJ*)aij->B->data; 1422 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1423 row = mat->rmap->rstart; 1424 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1425 ct = cols; 1426 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1427 for (i=0; i<m; i++) { 1428 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1429 row++; 1430 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1431 } 1432 ierr = PetscFree(ct);CHKERRQ(ierr); 1433 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1434 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 /* 1436 Everyone has to call to draw the matrix since the graphics waits are 1437 synchronized across all processors that share the PetscDraw object 1438 */ 1439 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1440 if (!rank) { 1441 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1442 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1443 } 1444 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1445 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1446 ierr = MatDestroy(&A);CHKERRQ(ierr); 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatView_MPIAIJ" 1453 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1454 { 1455 PetscErrorCode ierr; 1456 PetscBool iascii,isdraw,issocket,isbinary; 1457 1458 PetscFunctionBegin; 1459 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1463 if (iascii || isdraw || isbinary || issocket) { 1464 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatSOR_MPIAIJ" 1471 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1472 { 1473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1474 PetscErrorCode ierr; 1475 Vec bb1 = 0; 1476 PetscBool hasop; 1477 1478 PetscFunctionBegin; 1479 if (flag == SOR_APPLY_UPPER) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1485 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1486 } 1487 1488 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1489 if (flag & SOR_ZERO_INITIAL_GUESS) { 1490 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1491 its--; 1492 } 1493 1494 while (its--) { 1495 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 1498 /* update rhs: bb1 = bb - B*x */ 1499 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1500 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1501 1502 /* local sweep */ 1503 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1504 } 1505 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1506 if (flag & SOR_ZERO_INITIAL_GUESS) { 1507 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1508 its--; 1509 } 1510 while (its--) { 1511 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 1514 /* update rhs: bb1 = bb - B*x */ 1515 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1516 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1517 1518 /* local sweep */ 1519 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1520 } 1521 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1522 if (flag & SOR_ZERO_INITIAL_GUESS) { 1523 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1524 its--; 1525 } 1526 while (its--) { 1527 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1528 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 1530 /* update rhs: bb1 = bb - B*x */ 1531 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1532 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1533 1534 /* local sweep */ 1535 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1536 } 1537 } else if (flag & SOR_EISENSTAT) { 1538 Vec xx1; 1539 1540 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1542 1543 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1544 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 if (!mat->diag) { 1546 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1547 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1548 } 1549 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1550 if (hasop) { 1551 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1552 } else { 1553 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1554 } 1555 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1556 1557 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1558 1559 /* local sweep */ 1560 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1561 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1562 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1563 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1564 1565 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatPermute_MPIAIJ" 1571 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1572 { 1573 Mat aA,aB,Aperm; 1574 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1575 PetscScalar *aa,*ba; 1576 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1577 PetscSF rowsf,sf; 1578 IS parcolp = NULL; 1579 PetscBool done; 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1584 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1585 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1586 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1587 1588 /* Invert row permutation to find out where my rows should go */ 1589 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1590 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1591 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1593 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1594 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1595 1596 /* Invert column permutation to find out where my columns should go */ 1597 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1598 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1599 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1600 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1601 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1602 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1603 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1604 1605 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1606 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1607 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1608 1609 /* Find out where my gcols should go */ 1610 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1611 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1612 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1613 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1614 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1615 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1616 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1617 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1618 1619 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1620 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1621 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1622 for (i=0; i<m; i++) { 1623 PetscInt row = rdest[i],rowner; 1624 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1625 for (j=ai[i]; j<ai[i+1]; j++) { 1626 PetscInt cowner,col = cdest[aj[j]]; 1627 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1628 if (rowner == cowner) dnnz[i]++; 1629 else onnz[i]++; 1630 } 1631 for (j=bi[i]; j<bi[i+1]; j++) { 1632 PetscInt cowner,col = gcdest[bj[j]]; 1633 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1634 if (rowner == cowner) dnnz[i]++; 1635 else onnz[i]++; 1636 } 1637 } 1638 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1639 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1640 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1641 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1642 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1643 1644 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1645 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1646 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1647 for (i=0; i<m; i++) { 1648 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1649 PetscInt j0,rowlen; 1650 rowlen = ai[i+1] - ai[i]; 1651 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1652 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1653 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1654 } 1655 rowlen = bi[i+1] - bi[i]; 1656 for (j0=j=0; j<rowlen; j0=j) { 1657 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1658 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1659 } 1660 } 1661 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1662 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1663 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1664 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1665 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1666 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1667 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1668 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1669 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1670 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1671 *B = Aperm; 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1677 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1678 { 1679 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1680 PetscErrorCode ierr; 1681 1682 PetscFunctionBegin; 1683 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1684 if (ghosts) *ghosts = aij->garray; 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1690 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1691 { 1692 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1693 Mat A = mat->A,B = mat->B; 1694 PetscErrorCode ierr; 1695 PetscReal isend[5],irecv[5]; 1696 1697 PetscFunctionBegin; 1698 info->block_size = 1.0; 1699 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1700 1701 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1702 isend[3] = info->memory; isend[4] = info->mallocs; 1703 1704 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1705 1706 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1707 isend[3] += info->memory; isend[4] += info->mallocs; 1708 if (flag == MAT_LOCAL) { 1709 info->nz_used = isend[0]; 1710 info->nz_allocated = isend[1]; 1711 info->nz_unneeded = isend[2]; 1712 info->memory = isend[3]; 1713 info->mallocs = isend[4]; 1714 } else if (flag == MAT_GLOBAL_MAX) { 1715 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1716 1717 info->nz_used = irecv[0]; 1718 info->nz_allocated = irecv[1]; 1719 info->nz_unneeded = irecv[2]; 1720 info->memory = irecv[3]; 1721 info->mallocs = irecv[4]; 1722 } else if (flag == MAT_GLOBAL_SUM) { 1723 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1724 1725 info->nz_used = irecv[0]; 1726 info->nz_allocated = irecv[1]; 1727 info->nz_unneeded = irecv[2]; 1728 info->memory = irecv[3]; 1729 info->mallocs = irecv[4]; 1730 } 1731 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1732 info->fill_ratio_needed = 0; 1733 info->factor_mallocs = 0; 1734 PetscFunctionReturn(0); 1735 } 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatSetOption_MPIAIJ" 1739 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1740 { 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 switch (op) { 1746 case MAT_NEW_NONZERO_LOCATIONS: 1747 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1748 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1749 case MAT_KEEP_NONZERO_PATTERN: 1750 case MAT_NEW_NONZERO_LOCATION_ERR: 1751 case MAT_USE_INODES: 1752 case MAT_IGNORE_ZERO_ENTRIES: 1753 MatCheckPreallocated(A,1); 1754 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1755 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1756 break; 1757 case MAT_ROW_ORIENTED: 1758 a->roworiented = flg; 1759 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1762 break; 1763 case MAT_NEW_DIAGONALS: 1764 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1765 break; 1766 case MAT_IGNORE_OFF_PROC_ENTRIES: 1767 a->donotstash = flg; 1768 break; 1769 case MAT_SPD: 1770 A->spd_set = PETSC_TRUE; 1771 A->spd = flg; 1772 if (flg) { 1773 A->symmetric = PETSC_TRUE; 1774 A->structurally_symmetric = PETSC_TRUE; 1775 A->symmetric_set = PETSC_TRUE; 1776 A->structurally_symmetric_set = PETSC_TRUE; 1777 } 1778 break; 1779 case MAT_SYMMETRIC: 1780 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1781 break; 1782 case MAT_STRUCTURALLY_SYMMETRIC: 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 break; 1785 case MAT_HERMITIAN: 1786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1787 break; 1788 case MAT_SYMMETRY_ETERNAL: 1789 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1790 break; 1791 default: 1792 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1793 } 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatGetRow_MPIAIJ" 1799 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1800 { 1801 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1802 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1803 PetscErrorCode ierr; 1804 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1805 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1806 PetscInt *cmap,*idx_p; 1807 1808 PetscFunctionBegin; 1809 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1810 mat->getrowactive = PETSC_TRUE; 1811 1812 if (!mat->rowvalues && (idx || v)) { 1813 /* 1814 allocate enough space to hold information from the longest row. 1815 */ 1816 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1817 PetscInt max = 1,tmp; 1818 for (i=0; i<matin->rmap->n; i++) { 1819 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1820 if (max < tmp) max = tmp; 1821 } 1822 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1823 } 1824 1825 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1826 lrow = row - rstart; 1827 1828 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1829 if (!v) {pvA = 0; pvB = 0;} 1830 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1831 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1832 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1833 nztot = nzA + nzB; 1834 1835 cmap = mat->garray; 1836 if (v || idx) { 1837 if (nztot) { 1838 /* Sort by increasing column numbers, assuming A and B already sorted */ 1839 PetscInt imark = -1; 1840 if (v) { 1841 *v = v_p = mat->rowvalues; 1842 for (i=0; i<nzB; i++) { 1843 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1844 else break; 1845 } 1846 imark = i; 1847 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1848 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1849 } 1850 if (idx) { 1851 *idx = idx_p = mat->rowindices; 1852 if (imark > -1) { 1853 for (i=0; i<imark; i++) { 1854 idx_p[i] = cmap[cworkB[i]]; 1855 } 1856 } else { 1857 for (i=0; i<nzB; i++) { 1858 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1859 else break; 1860 } 1861 imark = i; 1862 } 1863 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1864 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1865 } 1866 } else { 1867 if (idx) *idx = 0; 1868 if (v) *v = 0; 1869 } 1870 } 1871 *nz = nztot; 1872 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1873 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1879 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1880 { 1881 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1882 1883 PetscFunctionBegin; 1884 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1885 aij->getrowactive = PETSC_FALSE; 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatNorm_MPIAIJ" 1891 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1892 { 1893 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1894 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1895 PetscErrorCode ierr; 1896 PetscInt i,j,cstart = mat->cmap->rstart; 1897 PetscReal sum = 0.0; 1898 MatScalar *v; 1899 1900 PetscFunctionBegin; 1901 if (aij->size == 1) { 1902 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1903 } else { 1904 if (type == NORM_FROBENIUS) { 1905 v = amat->a; 1906 for (i=0; i<amat->nz; i++) { 1907 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1908 } 1909 v = bmat->a; 1910 for (i=0; i<bmat->nz; i++) { 1911 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1912 } 1913 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1914 *norm = PetscSqrtReal(*norm); 1915 } else if (type == NORM_1) { /* max column norm */ 1916 PetscReal *tmp,*tmp2; 1917 PetscInt *jj,*garray = aij->garray; 1918 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1919 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1920 *norm = 0.0; 1921 v = amat->a; jj = amat->j; 1922 for (j=0; j<amat->nz; j++) { 1923 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1924 } 1925 v = bmat->a; jj = bmat->j; 1926 for (j=0; j<bmat->nz; j++) { 1927 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1928 } 1929 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1930 for (j=0; j<mat->cmap->N; j++) { 1931 if (tmp2[j] > *norm) *norm = tmp2[j]; 1932 } 1933 ierr = PetscFree(tmp);CHKERRQ(ierr); 1934 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1935 } else if (type == NORM_INFINITY) { /* max row norm */ 1936 PetscReal ntemp = 0.0; 1937 for (j=0; j<aij->A->rmap->n; j++) { 1938 v = amat->a + amat->i[j]; 1939 sum = 0.0; 1940 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1941 sum += PetscAbsScalar(*v); v++; 1942 } 1943 v = bmat->a + bmat->i[j]; 1944 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1945 sum += PetscAbsScalar(*v); v++; 1946 } 1947 if (sum > ntemp) ntemp = sum; 1948 } 1949 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1950 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1951 } 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatTranspose_MPIAIJ" 1957 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1958 { 1959 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1960 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1961 PetscErrorCode ierr; 1962 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1963 PetscInt cstart = A->cmap->rstart,ncol; 1964 Mat B; 1965 MatScalar *array; 1966 1967 PetscFunctionBegin; 1968 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1969 1970 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1971 ai = Aloc->i; aj = Aloc->j; 1972 bi = Bloc->i; bj = Bloc->j; 1973 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1974 PetscInt *d_nnz,*g_nnz,*o_nnz; 1975 PetscSFNode *oloc; 1976 PETSC_UNUSED PetscSF sf; 1977 1978 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1979 /* compute d_nnz for preallocation */ 1980 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1981 for (i=0; i<ai[ma]; i++) { 1982 d_nnz[aj[i]]++; 1983 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1984 } 1985 /* compute local off-diagonal contributions */ 1986 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1987 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1988 /* map those to global */ 1989 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1990 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1991 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1992 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1993 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1994 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1995 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1996 1997 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1998 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1999 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2000 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2001 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2002 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2003 } else { 2004 B = *matout; 2005 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2006 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2007 } 2008 2009 /* copy over the A part */ 2010 array = Aloc->a; 2011 row = A->rmap->rstart; 2012 for (i=0; i<ma; i++) { 2013 ncol = ai[i+1]-ai[i]; 2014 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2015 row++; 2016 array += ncol; aj += ncol; 2017 } 2018 aj = Aloc->j; 2019 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2020 2021 /* copy over the B part */ 2022 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2023 array = Bloc->a; 2024 row = A->rmap->rstart; 2025 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2026 cols_tmp = cols; 2027 for (i=0; i<mb; i++) { 2028 ncol = bi[i+1]-bi[i]; 2029 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2030 row++; 2031 array += ncol; cols_tmp += ncol; 2032 } 2033 ierr = PetscFree(cols);CHKERRQ(ierr); 2034 2035 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2036 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2037 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2038 *matout = B; 2039 } else { 2040 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2047 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2048 { 2049 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2050 Mat a = aij->A,b = aij->B; 2051 PetscErrorCode ierr; 2052 PetscInt s1,s2,s3; 2053 2054 PetscFunctionBegin; 2055 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2056 if (rr) { 2057 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2058 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2059 /* Overlap communication with computation. */ 2060 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2061 } 2062 if (ll) { 2063 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2064 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2065 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2066 } 2067 /* scale the diagonal block */ 2068 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2069 2070 if (rr) { 2071 /* Do a scatter end and then right scale the off-diagonal block */ 2072 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2073 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2080 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2081 { 2082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatEqual_MPIAIJ" 2092 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2093 { 2094 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2095 Mat a,b,c,d; 2096 PetscBool flg; 2097 PetscErrorCode ierr; 2098 2099 PetscFunctionBegin; 2100 a = matA->A; b = matA->B; 2101 c = matB->A; d = matB->B; 2102 2103 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2104 if (flg) { 2105 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2106 } 2107 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatCopy_MPIAIJ" 2113 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2114 { 2115 PetscErrorCode ierr; 2116 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2117 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2118 2119 PetscFunctionBegin; 2120 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2121 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2122 /* because of the column compression in the off-processor part of the matrix a->B, 2123 the number of columns in a->B and b->B may be different, hence we cannot call 2124 the MatCopy() directly on the two parts. If need be, we can provide a more 2125 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2126 then copying the submatrices */ 2127 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2128 } else { 2129 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2130 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatSetUp_MPIAIJ" 2137 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2138 { 2139 PetscErrorCode ierr; 2140 2141 PetscFunctionBegin; 2142 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 /* 2147 Computes the number of nonzeros per row needed for preallocation when X and Y 2148 have different nonzero structure. 2149 */ 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2152 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2153 { 2154 PetscInt i,j,k,nzx,nzy; 2155 2156 PetscFunctionBegin; 2157 /* Set the number of nonzeros in the new matrix */ 2158 for (i=0; i<m; i++) { 2159 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2160 nzx = xi[i+1] - xi[i]; 2161 nzy = yi[i+1] - yi[i]; 2162 nnz[i] = 0; 2163 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2164 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2165 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2166 nnz[i]++; 2167 } 2168 for (; k<nzy; k++) nnz[i]++; 2169 } 2170 PetscFunctionReturn(0); 2171 } 2172 2173 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2176 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2177 { 2178 PetscErrorCode ierr; 2179 PetscInt m = Y->rmap->N; 2180 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2181 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2182 2183 PetscFunctionBegin; 2184 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatAXPY_MPIAIJ" 2190 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2191 { 2192 PetscErrorCode ierr; 2193 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2194 PetscBLASInt bnz,one=1; 2195 Mat_SeqAIJ *x,*y; 2196 2197 PetscFunctionBegin; 2198 if (str == SAME_NONZERO_PATTERN) { 2199 PetscScalar alpha = a; 2200 x = (Mat_SeqAIJ*)xx->A->data; 2201 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2202 y = (Mat_SeqAIJ*)yy->A->data; 2203 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2204 x = (Mat_SeqAIJ*)xx->B->data; 2205 y = (Mat_SeqAIJ*)yy->B->data; 2206 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2207 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2208 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2209 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2210 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2211 } else { 2212 Mat B; 2213 PetscInt *nnz_d,*nnz_o; 2214 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2215 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2216 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2217 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2218 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2219 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2220 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2221 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2222 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2223 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2224 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2225 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2226 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2227 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatConjugate_MPIAIJ" 2236 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2237 { 2238 #if defined(PETSC_USE_COMPLEX) 2239 PetscErrorCode ierr; 2240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2241 2242 PetscFunctionBegin; 2243 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2244 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2245 #else 2246 PetscFunctionBegin; 2247 #endif 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatRealPart_MPIAIJ" 2253 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2254 { 2255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2256 PetscErrorCode ierr; 2257 2258 PetscFunctionBegin; 2259 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2260 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2266 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2267 { 2268 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2269 PetscErrorCode ierr; 2270 2271 PetscFunctionBegin; 2272 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2273 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 #undef __FUNCT__ 2278 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2279 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2280 { 2281 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2282 PetscErrorCode ierr; 2283 PetscInt i,*idxb = 0; 2284 PetscScalar *va,*vb; 2285 Vec vtmp; 2286 2287 PetscFunctionBegin; 2288 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2289 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2290 if (idx) { 2291 for (i=0; i<A->rmap->n; i++) { 2292 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2293 } 2294 } 2295 2296 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2297 if (idx) { 2298 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2299 } 2300 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2301 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2302 2303 for (i=0; i<A->rmap->n; i++) { 2304 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2305 va[i] = vb[i]; 2306 if (idx) idx[i] = a->garray[idxb[i]]; 2307 } 2308 } 2309 2310 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2311 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2312 ierr = PetscFree(idxb);CHKERRQ(ierr); 2313 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2319 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2320 { 2321 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2322 PetscErrorCode ierr; 2323 PetscInt i,*idxb = 0; 2324 PetscScalar *va,*vb; 2325 Vec vtmp; 2326 2327 PetscFunctionBegin; 2328 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2329 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2330 if (idx) { 2331 for (i=0; i<A->cmap->n; i++) { 2332 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2333 } 2334 } 2335 2336 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2337 if (idx) { 2338 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2339 } 2340 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2341 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2342 2343 for (i=0; i<A->rmap->n; i++) { 2344 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2345 va[i] = vb[i]; 2346 if (idx) idx[i] = a->garray[idxb[i]]; 2347 } 2348 } 2349 2350 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2351 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2352 ierr = PetscFree(idxb);CHKERRQ(ierr); 2353 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2359 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2360 { 2361 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2362 PetscInt n = A->rmap->n; 2363 PetscInt cstart = A->cmap->rstart; 2364 PetscInt *cmap = mat->garray; 2365 PetscInt *diagIdx, *offdiagIdx; 2366 Vec diagV, offdiagV; 2367 PetscScalar *a, *diagA, *offdiagA; 2368 PetscInt r; 2369 PetscErrorCode ierr; 2370 2371 PetscFunctionBegin; 2372 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2373 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2374 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2375 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2376 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2377 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2378 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2379 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2380 for (r = 0; r < n; ++r) { 2381 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2382 a[r] = diagA[r]; 2383 idx[r] = cstart + diagIdx[r]; 2384 } else { 2385 a[r] = offdiagA[r]; 2386 idx[r] = cmap[offdiagIdx[r]]; 2387 } 2388 } 2389 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2390 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2391 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2392 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2393 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2394 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2395 PetscFunctionReturn(0); 2396 } 2397 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2400 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2401 { 2402 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2403 PetscInt n = A->rmap->n; 2404 PetscInt cstart = A->cmap->rstart; 2405 PetscInt *cmap = mat->garray; 2406 PetscInt *diagIdx, *offdiagIdx; 2407 Vec diagV, offdiagV; 2408 PetscScalar *a, *diagA, *offdiagA; 2409 PetscInt r; 2410 PetscErrorCode ierr; 2411 2412 PetscFunctionBegin; 2413 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2414 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2415 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2416 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2417 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2418 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2419 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2420 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2421 for (r = 0; r < n; ++r) { 2422 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2423 a[r] = diagA[r]; 2424 idx[r] = cstart + diagIdx[r]; 2425 } else { 2426 a[r] = offdiagA[r]; 2427 idx[r] = cmap[offdiagIdx[r]]; 2428 } 2429 } 2430 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2431 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2432 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2433 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2434 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2435 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2441 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2442 { 2443 PetscErrorCode ierr; 2444 Mat *dummy; 2445 2446 PetscFunctionBegin; 2447 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2448 *newmat = *dummy; 2449 ierr = PetscFree(dummy);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2455 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2456 { 2457 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2458 PetscErrorCode ierr; 2459 2460 PetscFunctionBegin; 2461 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2467 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2468 { 2469 PetscErrorCode ierr; 2470 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2471 2472 PetscFunctionBegin; 2473 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2474 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2475 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2476 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2482 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2483 { 2484 PetscFunctionBegin; 2485 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2486 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #undef __FUNCT__ 2491 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2492 /*@ 2493 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2494 2495 Collective on Mat 2496 2497 Input Parameters: 2498 + A - the matrix 2499 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2500 2501 @*/ 2502 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2503 { 2504 PetscErrorCode ierr; 2505 2506 PetscFunctionBegin; 2507 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 #undef __FUNCT__ 2512 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2513 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptions *PetscOptionsObject,Mat A) 2514 { 2515 PetscErrorCode ierr; 2516 PetscBool sc = PETSC_FALSE,flg; 2517 2518 PetscFunctionBegin; 2519 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2520 ierr = PetscObjectOptionsBegin((PetscObject)A); 2521 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2522 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2523 if (flg) { 2524 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2525 } 2526 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "MatShift_MPIAIJ" 2532 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2533 { 2534 PetscErrorCode ierr; 2535 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2536 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2537 2538 PetscFunctionBegin; 2539 if (!Y->preallocated) { 2540 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2541 } else if (!aij->nz) { 2542 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2543 } 2544 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 /* -------------------------------------------------------------------*/ 2549 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2550 MatGetRow_MPIAIJ, 2551 MatRestoreRow_MPIAIJ, 2552 MatMult_MPIAIJ, 2553 /* 4*/ MatMultAdd_MPIAIJ, 2554 MatMultTranspose_MPIAIJ, 2555 MatMultTransposeAdd_MPIAIJ, 2556 0, 2557 0, 2558 0, 2559 /*10*/ 0, 2560 0, 2561 0, 2562 MatSOR_MPIAIJ, 2563 MatTranspose_MPIAIJ, 2564 /*15*/ MatGetInfo_MPIAIJ, 2565 MatEqual_MPIAIJ, 2566 MatGetDiagonal_MPIAIJ, 2567 MatDiagonalScale_MPIAIJ, 2568 MatNorm_MPIAIJ, 2569 /*20*/ MatAssemblyBegin_MPIAIJ, 2570 MatAssemblyEnd_MPIAIJ, 2571 MatSetOption_MPIAIJ, 2572 MatZeroEntries_MPIAIJ, 2573 /*24*/ MatZeroRows_MPIAIJ, 2574 0, 2575 0, 2576 0, 2577 0, 2578 /*29*/ MatSetUp_MPIAIJ, 2579 0, 2580 0, 2581 0, 2582 0, 2583 /*34*/ MatDuplicate_MPIAIJ, 2584 0, 2585 0, 2586 0, 2587 0, 2588 /*39*/ MatAXPY_MPIAIJ, 2589 MatGetSubMatrices_MPIAIJ, 2590 MatIncreaseOverlap_MPIAIJ, 2591 MatGetValues_MPIAIJ, 2592 MatCopy_MPIAIJ, 2593 /*44*/ MatGetRowMax_MPIAIJ, 2594 MatScale_MPIAIJ, 2595 MatShift_MPIAIJ, 2596 MatDiagonalSet_MPIAIJ, 2597 MatZeroRowsColumns_MPIAIJ, 2598 /*49*/ MatSetRandom_MPIAIJ, 2599 0, 2600 0, 2601 0, 2602 0, 2603 /*54*/ MatFDColoringCreate_MPIXAIJ, 2604 0, 2605 MatSetUnfactored_MPIAIJ, 2606 MatPermute_MPIAIJ, 2607 0, 2608 /*59*/ MatGetSubMatrix_MPIAIJ, 2609 MatDestroy_MPIAIJ, 2610 MatView_MPIAIJ, 2611 0, 2612 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2613 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2614 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2619 MatGetRowMinAbs_MPIAIJ, 2620 0, 2621 MatSetColoring_MPIAIJ, 2622 0, 2623 MatSetValuesAdifor_MPIAIJ, 2624 /*75*/ MatFDColoringApply_AIJ, 2625 MatSetFromOptions_MPIAIJ, 2626 0, 2627 0, 2628 MatFindZeroDiagonals_MPIAIJ, 2629 /*80*/ 0, 2630 0, 2631 0, 2632 /*83*/ MatLoad_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2639 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2640 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2641 MatPtAP_MPIAIJ_MPIAIJ, 2642 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2643 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*99*/ 0, 2649 0, 2650 0, 2651 MatConjugate_MPIAIJ, 2652 0, 2653 /*104*/MatSetValuesRow_MPIAIJ, 2654 MatRealPart_MPIAIJ, 2655 MatImaginaryPart_MPIAIJ, 2656 0, 2657 0, 2658 /*109*/0, 2659 0, 2660 MatGetRowMin_MPIAIJ, 2661 0, 2662 0, 2663 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2664 0, 2665 MatGetGhosts_MPIAIJ, 2666 0, 2667 0, 2668 /*119*/0, 2669 0, 2670 0, 2671 0, 2672 MatGetMultiProcBlock_MPIAIJ, 2673 /*124*/MatFindNonzeroRows_MPIAIJ, 2674 MatGetColumnNorms_MPIAIJ, 2675 MatInvertBlockDiagonal_MPIAIJ, 2676 0, 2677 MatGetSubMatricesMPI_MPIAIJ, 2678 /*129*/0, 2679 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2680 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2681 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2682 0, 2683 /*134*/0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*139*/0, 2689 0, 2690 0, 2691 MatFDColoringSetUp_MPIXAIJ, 2692 MatFindOffBlockDiagonalEntries_MPIAIJ, 2693 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2694 }; 2695 2696 /* ----------------------------------------------------------------------------------------*/ 2697 2698 #undef __FUNCT__ 2699 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2700 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2701 { 2702 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2703 PetscErrorCode ierr; 2704 2705 PetscFunctionBegin; 2706 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2707 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2713 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2714 { 2715 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2720 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2726 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2727 { 2728 Mat_MPIAIJ *b; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2733 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2734 b = (Mat_MPIAIJ*)B->data; 2735 2736 if (!B->preallocated) { 2737 /* Explicitly create 2 MATSEQAIJ matrices. */ 2738 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2739 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2740 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2741 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2742 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2743 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2744 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2745 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2746 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2747 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2748 } 2749 2750 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2751 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2752 B->preallocated = PETSC_TRUE; 2753 PetscFunctionReturn(0); 2754 } 2755 2756 #undef __FUNCT__ 2757 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2758 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2759 { 2760 Mat mat; 2761 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2762 PetscErrorCode ierr; 2763 2764 PetscFunctionBegin; 2765 *newmat = 0; 2766 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2767 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2768 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2769 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2770 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2771 a = (Mat_MPIAIJ*)mat->data; 2772 2773 mat->factortype = matin->factortype; 2774 mat->assembled = PETSC_TRUE; 2775 mat->insertmode = NOT_SET_VALUES; 2776 mat->preallocated = PETSC_TRUE; 2777 2778 a->size = oldmat->size; 2779 a->rank = oldmat->rank; 2780 a->donotstash = oldmat->donotstash; 2781 a->roworiented = oldmat->roworiented; 2782 a->rowindices = 0; 2783 a->rowvalues = 0; 2784 a->getrowactive = PETSC_FALSE; 2785 2786 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2787 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2788 2789 if (oldmat->colmap) { 2790 #if defined(PETSC_USE_CTABLE) 2791 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2792 #else 2793 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2794 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2795 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2796 #endif 2797 } else a->colmap = 0; 2798 if (oldmat->garray) { 2799 PetscInt len; 2800 len = oldmat->B->cmap->n; 2801 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2802 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2803 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2804 } else a->garray = 0; 2805 2806 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2807 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2808 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2809 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2810 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2811 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2812 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2813 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2814 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2815 *newmat = mat; 2816 PetscFunctionReturn(0); 2817 } 2818 2819 2820 2821 #undef __FUNCT__ 2822 #define __FUNCT__ "MatLoad_MPIAIJ" 2823 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2824 { 2825 PetscScalar *vals,*svals; 2826 MPI_Comm comm; 2827 PetscErrorCode ierr; 2828 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2829 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2830 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2831 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2832 PetscInt cend,cstart,n,*rowners; 2833 int fd; 2834 PetscInt bs = newMat->rmap->bs; 2835 2836 PetscFunctionBegin; 2837 /* force binary viewer to load .info file if it has not yet done so */ 2838 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2839 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2840 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2841 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2842 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2843 if (!rank) { 2844 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2845 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2846 } 2847 2848 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2849 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2850 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2851 if (bs < 0) bs = 1; 2852 2853 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2854 M = header[1]; N = header[2]; 2855 2856 /* If global sizes are set, check if they are consistent with that given in the file */ 2857 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2858 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2859 2860 /* determine ownership of all (block) rows */ 2861 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2862 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2863 else m = newMat->rmap->n; /* Set by user */ 2864 2865 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2866 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2867 2868 /* First process needs enough room for process with most rows */ 2869 if (!rank) { 2870 mmax = rowners[1]; 2871 for (i=2; i<=size; i++) { 2872 mmax = PetscMax(mmax, rowners[i]); 2873 } 2874 } else mmax = -1; /* unused, but compilers complain */ 2875 2876 rowners[0] = 0; 2877 for (i=2; i<=size; i++) { 2878 rowners[i] += rowners[i-1]; 2879 } 2880 rstart = rowners[rank]; 2881 rend = rowners[rank+1]; 2882 2883 /* distribute row lengths to all processors */ 2884 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2885 if (!rank) { 2886 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2887 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2888 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2889 for (j=0; j<m; j++) { 2890 procsnz[0] += ourlens[j]; 2891 } 2892 for (i=1; i<size; i++) { 2893 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2894 /* calculate the number of nonzeros on each processor */ 2895 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2896 procsnz[i] += rowlengths[j]; 2897 } 2898 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2899 } 2900 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2901 } else { 2902 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2903 } 2904 2905 if (!rank) { 2906 /* determine max buffer needed and allocate it */ 2907 maxnz = 0; 2908 for (i=0; i<size; i++) { 2909 maxnz = PetscMax(maxnz,procsnz[i]); 2910 } 2911 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2912 2913 /* read in my part of the matrix column indices */ 2914 nz = procsnz[0]; 2915 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2916 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2917 2918 /* read in every one elses and ship off */ 2919 for (i=1; i<size; i++) { 2920 nz = procsnz[i]; 2921 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2922 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2923 } 2924 ierr = PetscFree(cols);CHKERRQ(ierr); 2925 } else { 2926 /* determine buffer space needed for message */ 2927 nz = 0; 2928 for (i=0; i<m; i++) { 2929 nz += ourlens[i]; 2930 } 2931 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2932 2933 /* receive message of column indices*/ 2934 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2935 } 2936 2937 /* determine column ownership if matrix is not square */ 2938 if (N != M) { 2939 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2940 else n = newMat->cmap->n; 2941 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2942 cstart = cend - n; 2943 } else { 2944 cstart = rstart; 2945 cend = rend; 2946 n = cend - cstart; 2947 } 2948 2949 /* loop over local rows, determining number of off diagonal entries */ 2950 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2951 jj = 0; 2952 for (i=0; i<m; i++) { 2953 for (j=0; j<ourlens[i]; j++) { 2954 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2955 jj++; 2956 } 2957 } 2958 2959 for (i=0; i<m; i++) { 2960 ourlens[i] -= offlens[i]; 2961 } 2962 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2963 2964 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2965 2966 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2967 2968 for (i=0; i<m; i++) { 2969 ourlens[i] += offlens[i]; 2970 } 2971 2972 if (!rank) { 2973 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2974 2975 /* read in my part of the matrix numerical values */ 2976 nz = procsnz[0]; 2977 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2978 2979 /* insert into matrix */ 2980 jj = rstart; 2981 smycols = mycols; 2982 svals = vals; 2983 for (i=0; i<m; i++) { 2984 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2985 smycols += ourlens[i]; 2986 svals += ourlens[i]; 2987 jj++; 2988 } 2989 2990 /* read in other processors and ship out */ 2991 for (i=1; i<size; i++) { 2992 nz = procsnz[i]; 2993 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2994 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2995 } 2996 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2997 } else { 2998 /* receive numeric values */ 2999 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3000 3001 /* receive message of values*/ 3002 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3003 3004 /* insert into matrix */ 3005 jj = rstart; 3006 smycols = mycols; 3007 svals = vals; 3008 for (i=0; i<m; i++) { 3009 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3010 smycols += ourlens[i]; 3011 svals += ourlens[i]; 3012 jj++; 3013 } 3014 } 3015 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3016 ierr = PetscFree(vals);CHKERRQ(ierr); 3017 ierr = PetscFree(mycols);CHKERRQ(ierr); 3018 ierr = PetscFree(rowners);CHKERRQ(ierr); 3019 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3020 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3021 PetscFunctionReturn(0); 3022 } 3023 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3026 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3027 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3028 { 3029 PetscErrorCode ierr; 3030 IS iscol_local; 3031 PetscInt csize; 3032 3033 PetscFunctionBegin; 3034 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3035 if (call == MAT_REUSE_MATRIX) { 3036 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3037 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3038 } else { 3039 /* check if we are grabbing all columns*/ 3040 PetscBool isstride; 3041 PetscMPIInt lisstride = 0,gisstride; 3042 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3043 if (isstride) { 3044 PetscInt start,len,mstart,mlen; 3045 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3046 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3047 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3048 if (mstart == start && mlen-mstart == len) lisstride = 1; 3049 } 3050 ierr = MPI_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3051 if (gisstride) { 3052 PetscInt N; 3053 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3054 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3055 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3056 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3057 } else { 3058 PetscInt cbs; 3059 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3060 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3061 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3062 } 3063 } 3064 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3065 if (call == MAT_INITIAL_MATRIX) { 3066 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3067 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3068 } 3069 PetscFunctionReturn(0); 3070 } 3071 3072 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3075 /* 3076 Not great since it makes two copies of the submatrix, first an SeqAIJ 3077 in local and then by concatenating the local matrices the end result. 3078 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3079 3080 Note: This requires a sequential iscol with all indices. 3081 */ 3082 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3083 { 3084 PetscErrorCode ierr; 3085 PetscMPIInt rank,size; 3086 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3087 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3088 PetscBool allcolumns, colflag; 3089 Mat M,Mreuse; 3090 MatScalar *vwork,*aa; 3091 MPI_Comm comm; 3092 Mat_SeqAIJ *aij; 3093 3094 PetscFunctionBegin; 3095 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3096 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3097 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3098 3099 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3100 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3101 if (colflag && ncol == mat->cmap->N) { 3102 allcolumns = PETSC_TRUE; 3103 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3104 } else { 3105 allcolumns = PETSC_FALSE; 3106 } 3107 if (call == MAT_REUSE_MATRIX) { 3108 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3109 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3110 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3111 } else { 3112 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3113 } 3114 3115 /* 3116 m - number of local rows 3117 n - number of columns (same on all processors) 3118 rstart - first row in new global matrix generated 3119 */ 3120 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3121 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3122 if (call == MAT_INITIAL_MATRIX) { 3123 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3124 ii = aij->i; 3125 jj = aij->j; 3126 3127 /* 3128 Determine the number of non-zeros in the diagonal and off-diagonal 3129 portions of the matrix in order to do correct preallocation 3130 */ 3131 3132 /* first get start and end of "diagonal" columns */ 3133 if (csize == PETSC_DECIDE) { 3134 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3135 if (mglobal == n) { /* square matrix */ 3136 nlocal = m; 3137 } else { 3138 nlocal = n/size + ((n % size) > rank); 3139 } 3140 } else { 3141 nlocal = csize; 3142 } 3143 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3144 rstart = rend - nlocal; 3145 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3146 3147 /* next, compute all the lengths */ 3148 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3149 olens = dlens + m; 3150 for (i=0; i<m; i++) { 3151 jend = ii[i+1] - ii[i]; 3152 olen = 0; 3153 dlen = 0; 3154 for (j=0; j<jend; j++) { 3155 if (*jj < rstart || *jj >= rend) olen++; 3156 else dlen++; 3157 jj++; 3158 } 3159 olens[i] = olen; 3160 dlens[i] = dlen; 3161 } 3162 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3163 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3164 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3165 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3166 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3167 ierr = PetscFree(dlens);CHKERRQ(ierr); 3168 } else { 3169 PetscInt ml,nl; 3170 3171 M = *newmat; 3172 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3173 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3174 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3175 /* 3176 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3177 rather than the slower MatSetValues(). 3178 */ 3179 M->was_assembled = PETSC_TRUE; 3180 M->assembled = PETSC_FALSE; 3181 } 3182 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3183 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3184 ii = aij->i; 3185 jj = aij->j; 3186 aa = aij->a; 3187 for (i=0; i<m; i++) { 3188 row = rstart + i; 3189 nz = ii[i+1] - ii[i]; 3190 cwork = jj; jj += nz; 3191 vwork = aa; aa += nz; 3192 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3193 } 3194 3195 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3196 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3197 *newmat = M; 3198 3199 /* save submatrix used in processor for next request */ 3200 if (call == MAT_INITIAL_MATRIX) { 3201 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3202 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3203 } 3204 PetscFunctionReturn(0); 3205 } 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3209 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3210 { 3211 PetscInt m,cstart, cend,j,nnz,i,d; 3212 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3213 const PetscInt *JJ; 3214 PetscScalar *values; 3215 PetscErrorCode ierr; 3216 3217 PetscFunctionBegin; 3218 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3219 3220 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3221 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3222 m = B->rmap->n; 3223 cstart = B->cmap->rstart; 3224 cend = B->cmap->rend; 3225 rstart = B->rmap->rstart; 3226 3227 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3228 3229 #if defined(PETSC_USE_DEBUGGING) 3230 for (i=0; i<m; i++) { 3231 nnz = Ii[i+1]- Ii[i]; 3232 JJ = J + Ii[i]; 3233 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3234 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3235 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3236 } 3237 #endif 3238 3239 for (i=0; i<m; i++) { 3240 nnz = Ii[i+1]- Ii[i]; 3241 JJ = J + Ii[i]; 3242 nnz_max = PetscMax(nnz_max,nnz); 3243 d = 0; 3244 for (j=0; j<nnz; j++) { 3245 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3246 } 3247 d_nnz[i] = d; 3248 o_nnz[i] = nnz - d; 3249 } 3250 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3251 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3252 3253 if (v) values = (PetscScalar*)v; 3254 else { 3255 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3256 } 3257 3258 for (i=0; i<m; i++) { 3259 ii = i + rstart; 3260 nnz = Ii[i+1]- Ii[i]; 3261 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3262 } 3263 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 3266 if (!v) { 3267 ierr = PetscFree(values);CHKERRQ(ierr); 3268 } 3269 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3270 PetscFunctionReturn(0); 3271 } 3272 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3275 /*@ 3276 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3277 (the default parallel PETSc format). 3278 3279 Collective on MPI_Comm 3280 3281 Input Parameters: 3282 + B - the matrix 3283 . i - the indices into j for the start of each local row (starts with zero) 3284 . j - the column indices for each local row (starts with zero) 3285 - v - optional values in the matrix 3286 3287 Level: developer 3288 3289 Notes: 3290 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3291 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3292 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3293 3294 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3295 3296 The format which is used for the sparse matrix input, is equivalent to a 3297 row-major ordering.. i.e for the following matrix, the input data expected is 3298 as shown 3299 3300 $ 1 0 0 3301 $ 2 0 3 P0 3302 $ ------- 3303 $ 4 5 6 P1 3304 $ 3305 $ Process0 [P0]: rows_owned=[0,1] 3306 $ i = {0,1,3} [size = nrow+1 = 2+1] 3307 $ j = {0,0,2} [size = 3] 3308 $ v = {1,2,3} [size = 3] 3309 $ 3310 $ Process1 [P1]: rows_owned=[2] 3311 $ i = {0,3} [size = nrow+1 = 1+1] 3312 $ j = {0,1,2} [size = 3] 3313 $ v = {4,5,6} [size = 3] 3314 3315 .keywords: matrix, aij, compressed row, sparse, parallel 3316 3317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3318 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3319 @*/ 3320 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3321 { 3322 PetscErrorCode ierr; 3323 3324 PetscFunctionBegin; 3325 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3326 PetscFunctionReturn(0); 3327 } 3328 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3331 /*@C 3332 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3333 (the default parallel PETSc format). For good matrix assembly performance 3334 the user should preallocate the matrix storage by setting the parameters 3335 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3336 performance can be increased by more than a factor of 50. 3337 3338 Collective on MPI_Comm 3339 3340 Input Parameters: 3341 + B - the matrix 3342 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3343 (same value is used for all local rows) 3344 . d_nnz - array containing the number of nonzeros in the various rows of the 3345 DIAGONAL portion of the local submatrix (possibly different for each row) 3346 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3347 The size of this array is equal to the number of local rows, i.e 'm'. 3348 For matrices that will be factored, you must leave room for (and set) 3349 the diagonal entry even if it is zero. 3350 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3351 submatrix (same value is used for all local rows). 3352 - o_nnz - array containing the number of nonzeros in the various rows of the 3353 OFF-DIAGONAL portion of the local submatrix (possibly different for 3354 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3355 structure. The size of this array is equal to the number 3356 of local rows, i.e 'm'. 3357 3358 If the *_nnz parameter is given then the *_nz parameter is ignored 3359 3360 The AIJ format (also called the Yale sparse matrix format or 3361 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3362 storage. The stored row and column indices begin with zero. 3363 See Users-Manual: ch_mat for details. 3364 3365 The parallel matrix is partitioned such that the first m0 rows belong to 3366 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3367 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3368 3369 The DIAGONAL portion of the local submatrix of a processor can be defined 3370 as the submatrix which is obtained by extraction the part corresponding to 3371 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3372 first row that belongs to the processor, r2 is the last row belonging to 3373 the this processor, and c1-c2 is range of indices of the local part of a 3374 vector suitable for applying the matrix to. This is an mxn matrix. In the 3375 common case of a square matrix, the row and column ranges are the same and 3376 the DIAGONAL part is also square. The remaining portion of the local 3377 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3378 3379 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3380 3381 You can call MatGetInfo() to get information on how effective the preallocation was; 3382 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3383 You can also run with the option -info and look for messages with the string 3384 malloc in them to see if additional memory allocation was needed. 3385 3386 Example usage: 3387 3388 Consider the following 8x8 matrix with 34 non-zero values, that is 3389 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3390 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3391 as follows: 3392 3393 .vb 3394 1 2 0 | 0 3 0 | 0 4 3395 Proc0 0 5 6 | 7 0 0 | 8 0 3396 9 0 10 | 11 0 0 | 12 0 3397 ------------------------------------- 3398 13 0 14 | 15 16 17 | 0 0 3399 Proc1 0 18 0 | 19 20 21 | 0 0 3400 0 0 0 | 22 23 0 | 24 0 3401 ------------------------------------- 3402 Proc2 25 26 27 | 0 0 28 | 29 0 3403 30 0 0 | 31 32 33 | 0 34 3404 .ve 3405 3406 This can be represented as a collection of submatrices as: 3407 3408 .vb 3409 A B C 3410 D E F 3411 G H I 3412 .ve 3413 3414 Where the submatrices A,B,C are owned by proc0, D,E,F are 3415 owned by proc1, G,H,I are owned by proc2. 3416 3417 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3418 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3419 The 'M','N' parameters are 8,8, and have the same values on all procs. 3420 3421 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3422 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3423 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3424 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3425 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3426 matrix, ans [DF] as another SeqAIJ matrix. 3427 3428 When d_nz, o_nz parameters are specified, d_nz storage elements are 3429 allocated for every row of the local diagonal submatrix, and o_nz 3430 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3431 One way to choose d_nz and o_nz is to use the max nonzerors per local 3432 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3433 In this case, the values of d_nz,o_nz are: 3434 .vb 3435 proc0 : dnz = 2, o_nz = 2 3436 proc1 : dnz = 3, o_nz = 2 3437 proc2 : dnz = 1, o_nz = 4 3438 .ve 3439 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3440 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3441 for proc3. i.e we are using 12+15+10=37 storage locations to store 3442 34 values. 3443 3444 When d_nnz, o_nnz parameters are specified, the storage is specified 3445 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3446 In the above case the values for d_nnz,o_nnz are: 3447 .vb 3448 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3449 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3450 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3451 .ve 3452 Here the space allocated is sum of all the above values i.e 34, and 3453 hence pre-allocation is perfect. 3454 3455 Level: intermediate 3456 3457 .keywords: matrix, aij, compressed row, sparse, parallel 3458 3459 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3460 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3461 @*/ 3462 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3463 { 3464 PetscErrorCode ierr; 3465 3466 PetscFunctionBegin; 3467 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3468 PetscValidType(B,1); 3469 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3470 PetscFunctionReturn(0); 3471 } 3472 3473 #undef __FUNCT__ 3474 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3475 /*@ 3476 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3477 CSR format the local rows. 3478 3479 Collective on MPI_Comm 3480 3481 Input Parameters: 3482 + comm - MPI communicator 3483 . m - number of local rows (Cannot be PETSC_DECIDE) 3484 . n - This value should be the same as the local size used in creating the 3485 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3486 calculated if N is given) For square matrices n is almost always m. 3487 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3488 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3489 . i - row indices 3490 . j - column indices 3491 - a - matrix values 3492 3493 Output Parameter: 3494 . mat - the matrix 3495 3496 Level: intermediate 3497 3498 Notes: 3499 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3500 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3501 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3502 3503 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3504 3505 The format which is used for the sparse matrix input, is equivalent to a 3506 row-major ordering.. i.e for the following matrix, the input data expected is 3507 as shown 3508 3509 $ 1 0 0 3510 $ 2 0 3 P0 3511 $ ------- 3512 $ 4 5 6 P1 3513 $ 3514 $ Process0 [P0]: rows_owned=[0,1] 3515 $ i = {0,1,3} [size = nrow+1 = 2+1] 3516 $ j = {0,0,2} [size = 3] 3517 $ v = {1,2,3} [size = 3] 3518 $ 3519 $ Process1 [P1]: rows_owned=[2] 3520 $ i = {0,3} [size = nrow+1 = 1+1] 3521 $ j = {0,1,2} [size = 3] 3522 $ v = {4,5,6} [size = 3] 3523 3524 .keywords: matrix, aij, compressed row, sparse, parallel 3525 3526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3527 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3528 @*/ 3529 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3530 { 3531 PetscErrorCode ierr; 3532 3533 PetscFunctionBegin; 3534 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3535 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3536 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3537 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3538 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3539 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3540 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3541 PetscFunctionReturn(0); 3542 } 3543 3544 #undef __FUNCT__ 3545 #define __FUNCT__ "MatCreateAIJ" 3546 /*@C 3547 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3548 (the default parallel PETSc format). For good matrix assembly performance 3549 the user should preallocate the matrix storage by setting the parameters 3550 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3551 performance can be increased by more than a factor of 50. 3552 3553 Collective on MPI_Comm 3554 3555 Input Parameters: 3556 + comm - MPI communicator 3557 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3558 This value should be the same as the local size used in creating the 3559 y vector for the matrix-vector product y = Ax. 3560 . n - This value should be the same as the local size used in creating the 3561 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3562 calculated if N is given) For square matrices n is almost always m. 3563 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3564 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3565 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3566 (same value is used for all local rows) 3567 . d_nnz - array containing the number of nonzeros in the various rows of the 3568 DIAGONAL portion of the local submatrix (possibly different for each row) 3569 or NULL, if d_nz is used to specify the nonzero structure. 3570 The size of this array is equal to the number of local rows, i.e 'm'. 3571 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3572 submatrix (same value is used for all local rows). 3573 - o_nnz - array containing the number of nonzeros in the various rows of the 3574 OFF-DIAGONAL portion of the local submatrix (possibly different for 3575 each row) or NULL, if o_nz is used to specify the nonzero 3576 structure. The size of this array is equal to the number 3577 of local rows, i.e 'm'. 3578 3579 Output Parameter: 3580 . A - the matrix 3581 3582 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3583 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3584 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3585 3586 Notes: 3587 If the *_nnz parameter is given then the *_nz parameter is ignored 3588 3589 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3590 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3591 storage requirements for this matrix. 3592 3593 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3594 processor than it must be used on all processors that share the object for 3595 that argument. 3596 3597 The user MUST specify either the local or global matrix dimensions 3598 (possibly both). 3599 3600 The parallel matrix is partitioned across processors such that the 3601 first m0 rows belong to process 0, the next m1 rows belong to 3602 process 1, the next m2 rows belong to process 2 etc.. where 3603 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3604 values corresponding to [m x N] submatrix. 3605 3606 The columns are logically partitioned with the n0 columns belonging 3607 to 0th partition, the next n1 columns belonging to the next 3608 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3609 3610 The DIAGONAL portion of the local submatrix on any given processor 3611 is the submatrix corresponding to the rows and columns m,n 3612 corresponding to the given processor. i.e diagonal matrix on 3613 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3614 etc. The remaining portion of the local submatrix [m x (N-n)] 3615 constitute the OFF-DIAGONAL portion. The example below better 3616 illustrates this concept. 3617 3618 For a square global matrix we define each processor's diagonal portion 3619 to be its local rows and the corresponding columns (a square submatrix); 3620 each processor's off-diagonal portion encompasses the remainder of the 3621 local matrix (a rectangular submatrix). 3622 3623 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3624 3625 When calling this routine with a single process communicator, a matrix of 3626 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3627 type of communicator, use the construction mechanism: 3628 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3629 3630 By default, this format uses inodes (identical nodes) when possible. 3631 We search for consecutive rows with the same nonzero structure, thereby 3632 reusing matrix information to achieve increased efficiency. 3633 3634 Options Database Keys: 3635 + -mat_no_inode - Do not use inodes 3636 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3637 - -mat_aij_oneindex - Internally use indexing starting at 1 3638 rather than 0. Note that when calling MatSetValues(), 3639 the user still MUST index entries starting at 0! 3640 3641 3642 Example usage: 3643 3644 Consider the following 8x8 matrix with 34 non-zero values, that is 3645 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3646 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3647 as follows: 3648 3649 .vb 3650 1 2 0 | 0 3 0 | 0 4 3651 Proc0 0 5 6 | 7 0 0 | 8 0 3652 9 0 10 | 11 0 0 | 12 0 3653 ------------------------------------- 3654 13 0 14 | 15 16 17 | 0 0 3655 Proc1 0 18 0 | 19 20 21 | 0 0 3656 0 0 0 | 22 23 0 | 24 0 3657 ------------------------------------- 3658 Proc2 25 26 27 | 0 0 28 | 29 0 3659 30 0 0 | 31 32 33 | 0 34 3660 .ve 3661 3662 This can be represented as a collection of submatrices as: 3663 3664 .vb 3665 A B C 3666 D E F 3667 G H I 3668 .ve 3669 3670 Where the submatrices A,B,C are owned by proc0, D,E,F are 3671 owned by proc1, G,H,I are owned by proc2. 3672 3673 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3674 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3675 The 'M','N' parameters are 8,8, and have the same values on all procs. 3676 3677 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3678 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3679 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3680 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3681 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3682 matrix, ans [DF] as another SeqAIJ matrix. 3683 3684 When d_nz, o_nz parameters are specified, d_nz storage elements are 3685 allocated for every row of the local diagonal submatrix, and o_nz 3686 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3687 One way to choose d_nz and o_nz is to use the max nonzerors per local 3688 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3689 In this case, the values of d_nz,o_nz are: 3690 .vb 3691 proc0 : dnz = 2, o_nz = 2 3692 proc1 : dnz = 3, o_nz = 2 3693 proc2 : dnz = 1, o_nz = 4 3694 .ve 3695 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3696 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3697 for proc3. i.e we are using 12+15+10=37 storage locations to store 3698 34 values. 3699 3700 When d_nnz, o_nnz parameters are specified, the storage is specified 3701 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3702 In the above case the values for d_nnz,o_nnz are: 3703 .vb 3704 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3705 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3706 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3707 .ve 3708 Here the space allocated is sum of all the above values i.e 34, and 3709 hence pre-allocation is perfect. 3710 3711 Level: intermediate 3712 3713 .keywords: matrix, aij, compressed row, sparse, parallel 3714 3715 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3716 MPIAIJ, MatCreateMPIAIJWithArrays() 3717 @*/ 3718 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3719 { 3720 PetscErrorCode ierr; 3721 PetscMPIInt size; 3722 3723 PetscFunctionBegin; 3724 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3725 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3726 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3727 if (size > 1) { 3728 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3729 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3730 } else { 3731 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3732 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3733 } 3734 PetscFunctionReturn(0); 3735 } 3736 3737 #undef __FUNCT__ 3738 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3739 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3740 { 3741 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3742 3743 PetscFunctionBegin; 3744 if (Ad) *Ad = a->A; 3745 if (Ao) *Ao = a->B; 3746 if (colmap) *colmap = a->garray; 3747 PetscFunctionReturn(0); 3748 } 3749 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3752 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3753 { 3754 PetscErrorCode ierr; 3755 PetscInt i; 3756 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3757 3758 PetscFunctionBegin; 3759 if (coloring->ctype == IS_COLORING_GLOBAL) { 3760 ISColoringValue *allcolors,*colors; 3761 ISColoring ocoloring; 3762 3763 /* set coloring for diagonal portion */ 3764 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3765 3766 /* set coloring for off-diagonal portion */ 3767 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3768 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3769 for (i=0; i<a->B->cmap->n; i++) { 3770 colors[i] = allcolors[a->garray[i]]; 3771 } 3772 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3773 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3774 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3775 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3776 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3777 ISColoringValue *colors; 3778 PetscInt *larray; 3779 ISColoring ocoloring; 3780 3781 /* set coloring for diagonal portion */ 3782 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3783 for (i=0; i<a->A->cmap->n; i++) { 3784 larray[i] = i + A->cmap->rstart; 3785 } 3786 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3787 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3788 for (i=0; i<a->A->cmap->n; i++) { 3789 colors[i] = coloring->colors[larray[i]]; 3790 } 3791 ierr = PetscFree(larray);CHKERRQ(ierr); 3792 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3793 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3794 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3795 3796 /* set coloring for off-diagonal portion */ 3797 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3798 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3799 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3800 for (i=0; i<a->B->cmap->n; i++) { 3801 colors[i] = coloring->colors[larray[i]]; 3802 } 3803 ierr = PetscFree(larray);CHKERRQ(ierr); 3804 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3805 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3806 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3807 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3808 PetscFunctionReturn(0); 3809 } 3810 3811 #undef __FUNCT__ 3812 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3813 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3814 { 3815 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3816 PetscErrorCode ierr; 3817 3818 PetscFunctionBegin; 3819 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3820 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3821 PetscFunctionReturn(0); 3822 } 3823 3824 #undef __FUNCT__ 3825 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3826 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3827 { 3828 PetscErrorCode ierr; 3829 PetscInt m,N,i,rstart,nnz,Ii; 3830 PetscInt *indx; 3831 PetscScalar *values; 3832 3833 PetscFunctionBegin; 3834 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3835 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3836 PetscInt *dnz,*onz,sum,bs,cbs; 3837 3838 if (n == PETSC_DECIDE) { 3839 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3840 } 3841 /* Check sum(n) = N */ 3842 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3843 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3844 3845 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3846 rstart -= m; 3847 3848 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3849 for (i=0; i<m; i++) { 3850 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3851 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3852 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3853 } 3854 3855 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3856 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3857 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3858 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3859 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3860 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3861 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3862 } 3863 3864 /* numeric phase */ 3865 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3866 for (i=0; i<m; i++) { 3867 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3868 Ii = i + rstart; 3869 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3870 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3871 } 3872 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 PetscFunctionReturn(0); 3875 } 3876 3877 #undef __FUNCT__ 3878 #define __FUNCT__ "MatFileSplit" 3879 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3880 { 3881 PetscErrorCode ierr; 3882 PetscMPIInt rank; 3883 PetscInt m,N,i,rstart,nnz; 3884 size_t len; 3885 const PetscInt *indx; 3886 PetscViewer out; 3887 char *name; 3888 Mat B; 3889 const PetscScalar *values; 3890 3891 PetscFunctionBegin; 3892 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3893 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3894 /* Should this be the type of the diagonal block of A? */ 3895 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3896 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3897 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3898 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3899 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3900 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3901 for (i=0; i<m; i++) { 3902 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3903 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3904 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3905 } 3906 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3907 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 3909 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3910 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3911 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3912 sprintf(name,"%s.%d",outfile,rank); 3913 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3914 ierr = PetscFree(name);CHKERRQ(ierr); 3915 ierr = MatView(B,out);CHKERRQ(ierr); 3916 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3917 ierr = MatDestroy(&B);CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3922 #undef __FUNCT__ 3923 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3924 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3925 { 3926 PetscErrorCode ierr; 3927 Mat_Merge_SeqsToMPI *merge; 3928 PetscContainer container; 3929 3930 PetscFunctionBegin; 3931 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3932 if (container) { 3933 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3934 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3935 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3936 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3937 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3938 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3939 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3940 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3941 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3942 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3943 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3944 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3945 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3946 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3947 ierr = PetscFree(merge);CHKERRQ(ierr); 3948 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3949 } 3950 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3951 PetscFunctionReturn(0); 3952 } 3953 3954 #include <../src/mat/utils/freespace.h> 3955 #include <petscbt.h> 3956 3957 #undef __FUNCT__ 3958 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3959 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3960 { 3961 PetscErrorCode ierr; 3962 MPI_Comm comm; 3963 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3964 PetscMPIInt size,rank,taga,*len_s; 3965 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3966 PetscInt proc,m; 3967 PetscInt **buf_ri,**buf_rj; 3968 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3969 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3970 MPI_Request *s_waits,*r_waits; 3971 MPI_Status *status; 3972 MatScalar *aa=a->a; 3973 MatScalar **abuf_r,*ba_i; 3974 Mat_Merge_SeqsToMPI *merge; 3975 PetscContainer container; 3976 3977 PetscFunctionBegin; 3978 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3979 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3980 3981 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3982 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3983 3984 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3985 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3986 3987 bi = merge->bi; 3988 bj = merge->bj; 3989 buf_ri = merge->buf_ri; 3990 buf_rj = merge->buf_rj; 3991 3992 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3993 owners = merge->rowmap->range; 3994 len_s = merge->len_s; 3995 3996 /* send and recv matrix values */ 3997 /*-----------------------------*/ 3998 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3999 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4000 4001 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4002 for (proc=0,k=0; proc<size; proc++) { 4003 if (!len_s[proc]) continue; 4004 i = owners[proc]; 4005 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4006 k++; 4007 } 4008 4009 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4010 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4011 ierr = PetscFree(status);CHKERRQ(ierr); 4012 4013 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4014 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4015 4016 /* insert mat values of mpimat */ 4017 /*----------------------------*/ 4018 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4019 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4020 4021 for (k=0; k<merge->nrecv; k++) { 4022 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4023 nrows = *(buf_ri_k[k]); 4024 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4025 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4026 } 4027 4028 /* set values of ba */ 4029 m = merge->rowmap->n; 4030 for (i=0; i<m; i++) { 4031 arow = owners[rank] + i; 4032 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4033 bnzi = bi[i+1] - bi[i]; 4034 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4035 4036 /* add local non-zero vals of this proc's seqmat into ba */ 4037 anzi = ai[arow+1] - ai[arow]; 4038 aj = a->j + ai[arow]; 4039 aa = a->a + ai[arow]; 4040 nextaj = 0; 4041 for (j=0; nextaj<anzi; j++) { 4042 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4043 ba_i[j] += aa[nextaj++]; 4044 } 4045 } 4046 4047 /* add received vals into ba */ 4048 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4049 /* i-th row */ 4050 if (i == *nextrow[k]) { 4051 anzi = *(nextai[k]+1) - *nextai[k]; 4052 aj = buf_rj[k] + *(nextai[k]); 4053 aa = abuf_r[k] + *(nextai[k]); 4054 nextaj = 0; 4055 for (j=0; nextaj<anzi; j++) { 4056 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4057 ba_i[j] += aa[nextaj++]; 4058 } 4059 } 4060 nextrow[k]++; nextai[k]++; 4061 } 4062 } 4063 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4064 } 4065 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4066 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4067 4068 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4069 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4070 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4071 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4072 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4073 PetscFunctionReturn(0); 4074 } 4075 4076 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4077 4078 #undef __FUNCT__ 4079 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4080 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4081 { 4082 PetscErrorCode ierr; 4083 Mat B_mpi; 4084 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4085 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4086 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4087 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4088 PetscInt len,proc,*dnz,*onz,bs,cbs; 4089 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4090 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4091 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4092 MPI_Status *status; 4093 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4094 PetscBT lnkbt; 4095 Mat_Merge_SeqsToMPI *merge; 4096 PetscContainer container; 4097 4098 PetscFunctionBegin; 4099 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4100 4101 /* make sure it is a PETSc comm */ 4102 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4103 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4104 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4105 4106 ierr = PetscNew(&merge);CHKERRQ(ierr); 4107 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4108 4109 /* determine row ownership */ 4110 /*---------------------------------------------------------*/ 4111 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4112 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4113 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4114 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4115 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4116 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4117 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4118 4119 m = merge->rowmap->n; 4120 owners = merge->rowmap->range; 4121 4122 /* determine the number of messages to send, their lengths */ 4123 /*---------------------------------------------------------*/ 4124 len_s = merge->len_s; 4125 4126 len = 0; /* length of buf_si[] */ 4127 merge->nsend = 0; 4128 for (proc=0; proc<size; proc++) { 4129 len_si[proc] = 0; 4130 if (proc == rank) { 4131 len_s[proc] = 0; 4132 } else { 4133 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4134 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4135 } 4136 if (len_s[proc]) { 4137 merge->nsend++; 4138 nrows = 0; 4139 for (i=owners[proc]; i<owners[proc+1]; i++) { 4140 if (ai[i+1] > ai[i]) nrows++; 4141 } 4142 len_si[proc] = 2*(nrows+1); 4143 len += len_si[proc]; 4144 } 4145 } 4146 4147 /* determine the number and length of messages to receive for ij-structure */ 4148 /*-------------------------------------------------------------------------*/ 4149 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4150 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4151 4152 /* post the Irecv of j-structure */ 4153 /*-------------------------------*/ 4154 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4155 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4156 4157 /* post the Isend of j-structure */ 4158 /*--------------------------------*/ 4159 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4160 4161 for (proc=0, k=0; proc<size; proc++) { 4162 if (!len_s[proc]) continue; 4163 i = owners[proc]; 4164 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4165 k++; 4166 } 4167 4168 /* receives and sends of j-structure are complete */ 4169 /*------------------------------------------------*/ 4170 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4171 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4172 4173 /* send and recv i-structure */ 4174 /*---------------------------*/ 4175 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4176 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4177 4178 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4179 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4180 for (proc=0,k=0; proc<size; proc++) { 4181 if (!len_s[proc]) continue; 4182 /* form outgoing message for i-structure: 4183 buf_si[0]: nrows to be sent 4184 [1:nrows]: row index (global) 4185 [nrows+1:2*nrows+1]: i-structure index 4186 */ 4187 /*-------------------------------------------*/ 4188 nrows = len_si[proc]/2 - 1; 4189 buf_si_i = buf_si + nrows+1; 4190 buf_si[0] = nrows; 4191 buf_si_i[0] = 0; 4192 nrows = 0; 4193 for (i=owners[proc]; i<owners[proc+1]; i++) { 4194 anzi = ai[i+1] - ai[i]; 4195 if (anzi) { 4196 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4197 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4198 nrows++; 4199 } 4200 } 4201 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4202 k++; 4203 buf_si += len_si[proc]; 4204 } 4205 4206 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4207 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4208 4209 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4210 for (i=0; i<merge->nrecv; i++) { 4211 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4212 } 4213 4214 ierr = PetscFree(len_si);CHKERRQ(ierr); 4215 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4216 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4217 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4218 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4219 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4220 ierr = PetscFree(status);CHKERRQ(ierr); 4221 4222 /* compute a local seq matrix in each processor */ 4223 /*----------------------------------------------*/ 4224 /* allocate bi array and free space for accumulating nonzero column info */ 4225 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4226 bi[0] = 0; 4227 4228 /* create and initialize a linked list */ 4229 nlnk = N+1; 4230 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4231 4232 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4233 len = ai[owners[rank+1]] - ai[owners[rank]]; 4234 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4235 4236 current_space = free_space; 4237 4238 /* determine symbolic info for each local row */ 4239 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4240 4241 for (k=0; k<merge->nrecv; k++) { 4242 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4243 nrows = *buf_ri_k[k]; 4244 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4245 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4246 } 4247 4248 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4249 len = 0; 4250 for (i=0; i<m; i++) { 4251 bnzi = 0; 4252 /* add local non-zero cols of this proc's seqmat into lnk */ 4253 arow = owners[rank] + i; 4254 anzi = ai[arow+1] - ai[arow]; 4255 aj = a->j + ai[arow]; 4256 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4257 bnzi += nlnk; 4258 /* add received col data into lnk */ 4259 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4260 if (i == *nextrow[k]) { /* i-th row */ 4261 anzi = *(nextai[k]+1) - *nextai[k]; 4262 aj = buf_rj[k] + *nextai[k]; 4263 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4264 bnzi += nlnk; 4265 nextrow[k]++; nextai[k]++; 4266 } 4267 } 4268 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4269 4270 /* if free space is not available, make more free space */ 4271 if (current_space->local_remaining<bnzi) { 4272 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4273 nspacedouble++; 4274 } 4275 /* copy data into free space, then initialize lnk */ 4276 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4277 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4278 4279 current_space->array += bnzi; 4280 current_space->local_used += bnzi; 4281 current_space->local_remaining -= bnzi; 4282 4283 bi[i+1] = bi[i] + bnzi; 4284 } 4285 4286 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4287 4288 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4289 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4290 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4291 4292 /* create symbolic parallel matrix B_mpi */ 4293 /*---------------------------------------*/ 4294 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4295 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4296 if (n==PETSC_DECIDE) { 4297 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4298 } else { 4299 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4300 } 4301 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4302 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4303 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4304 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4305 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4306 4307 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4308 B_mpi->assembled = PETSC_FALSE; 4309 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4310 merge->bi = bi; 4311 merge->bj = bj; 4312 merge->buf_ri = buf_ri; 4313 merge->buf_rj = buf_rj; 4314 merge->coi = NULL; 4315 merge->coj = NULL; 4316 merge->owners_co = NULL; 4317 4318 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4319 4320 /* attach the supporting struct to B_mpi for reuse */ 4321 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4322 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4323 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4324 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4325 *mpimat = B_mpi; 4326 4327 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4333 /*@C 4334 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4335 matrices from each processor 4336 4337 Collective on MPI_Comm 4338 4339 Input Parameters: 4340 + comm - the communicators the parallel matrix will live on 4341 . seqmat - the input sequential matrices 4342 . m - number of local rows (or PETSC_DECIDE) 4343 . n - number of local columns (or PETSC_DECIDE) 4344 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4345 4346 Output Parameter: 4347 . mpimat - the parallel matrix generated 4348 4349 Level: advanced 4350 4351 Notes: 4352 The dimensions of the sequential matrix in each processor MUST be the same. 4353 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4354 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4355 @*/ 4356 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4357 { 4358 PetscErrorCode ierr; 4359 PetscMPIInt size; 4360 4361 PetscFunctionBegin; 4362 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4363 if (size == 1) { 4364 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4365 if (scall == MAT_INITIAL_MATRIX) { 4366 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4367 } else { 4368 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4369 } 4370 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4371 PetscFunctionReturn(0); 4372 } 4373 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4374 if (scall == MAT_INITIAL_MATRIX) { 4375 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4376 } 4377 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4378 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4379 PetscFunctionReturn(0); 4380 } 4381 4382 #undef __FUNCT__ 4383 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4384 /*@ 4385 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4386 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4387 with MatGetSize() 4388 4389 Not Collective 4390 4391 Input Parameters: 4392 + A - the matrix 4393 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4394 4395 Output Parameter: 4396 . A_loc - the local sequential matrix generated 4397 4398 Level: developer 4399 4400 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4401 4402 @*/ 4403 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4404 { 4405 PetscErrorCode ierr; 4406 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4407 Mat_SeqAIJ *mat,*a,*b; 4408 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4409 MatScalar *aa,*ba,*cam; 4410 PetscScalar *ca; 4411 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4412 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4413 PetscBool match; 4414 MPI_Comm comm; 4415 PetscMPIInt size; 4416 4417 PetscFunctionBegin; 4418 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4419 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4420 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4421 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4422 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4423 4424 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4425 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4426 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4427 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4428 aa = a->a; ba = b->a; 4429 if (scall == MAT_INITIAL_MATRIX) { 4430 if (size == 1) { 4431 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4432 PetscFunctionReturn(0); 4433 } 4434 4435 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4436 ci[0] = 0; 4437 for (i=0; i<am; i++) { 4438 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4439 } 4440 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4441 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4442 k = 0; 4443 for (i=0; i<am; i++) { 4444 ncols_o = bi[i+1] - bi[i]; 4445 ncols_d = ai[i+1] - ai[i]; 4446 /* off-diagonal portion of A */ 4447 for (jo=0; jo<ncols_o; jo++) { 4448 col = cmap[*bj]; 4449 if (col >= cstart) break; 4450 cj[k] = col; bj++; 4451 ca[k++] = *ba++; 4452 } 4453 /* diagonal portion of A */ 4454 for (j=0; j<ncols_d; j++) { 4455 cj[k] = cstart + *aj++; 4456 ca[k++] = *aa++; 4457 } 4458 /* off-diagonal portion of A */ 4459 for (j=jo; j<ncols_o; j++) { 4460 cj[k] = cmap[*bj++]; 4461 ca[k++] = *ba++; 4462 } 4463 } 4464 /* put together the new matrix */ 4465 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4466 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4467 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4468 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4469 mat->free_a = PETSC_TRUE; 4470 mat->free_ij = PETSC_TRUE; 4471 mat->nonew = 0; 4472 } else if (scall == MAT_REUSE_MATRIX) { 4473 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4474 ci = mat->i; cj = mat->j; cam = mat->a; 4475 for (i=0; i<am; i++) { 4476 /* off-diagonal portion of A */ 4477 ncols_o = bi[i+1] - bi[i]; 4478 for (jo=0; jo<ncols_o; jo++) { 4479 col = cmap[*bj]; 4480 if (col >= cstart) break; 4481 *cam++ = *ba++; bj++; 4482 } 4483 /* diagonal portion of A */ 4484 ncols_d = ai[i+1] - ai[i]; 4485 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4486 /* off-diagonal portion of A */ 4487 for (j=jo; j<ncols_o; j++) { 4488 *cam++ = *ba++; bj++; 4489 } 4490 } 4491 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4492 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4493 PetscFunctionReturn(0); 4494 } 4495 4496 #undef __FUNCT__ 4497 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4498 /*@C 4499 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4500 4501 Not Collective 4502 4503 Input Parameters: 4504 + A - the matrix 4505 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4506 - row, col - index sets of rows and columns to extract (or NULL) 4507 4508 Output Parameter: 4509 . A_loc - the local sequential matrix generated 4510 4511 Level: developer 4512 4513 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4514 4515 @*/ 4516 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4517 { 4518 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4519 PetscErrorCode ierr; 4520 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4521 IS isrowa,iscola; 4522 Mat *aloc; 4523 PetscBool match; 4524 4525 PetscFunctionBegin; 4526 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4527 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4528 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4529 if (!row) { 4530 start = A->rmap->rstart; end = A->rmap->rend; 4531 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4532 } else { 4533 isrowa = *row; 4534 } 4535 if (!col) { 4536 start = A->cmap->rstart; 4537 cmap = a->garray; 4538 nzA = a->A->cmap->n; 4539 nzB = a->B->cmap->n; 4540 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4541 ncols = 0; 4542 for (i=0; i<nzB; i++) { 4543 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4544 else break; 4545 } 4546 imark = i; 4547 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4548 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4549 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4550 } else { 4551 iscola = *col; 4552 } 4553 if (scall != MAT_INITIAL_MATRIX) { 4554 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4555 aloc[0] = *A_loc; 4556 } 4557 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4558 *A_loc = aloc[0]; 4559 ierr = PetscFree(aloc);CHKERRQ(ierr); 4560 if (!row) { 4561 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4562 } 4563 if (!col) { 4564 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4565 } 4566 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4567 PetscFunctionReturn(0); 4568 } 4569 4570 #undef __FUNCT__ 4571 #define __FUNCT__ "MatGetBrowsOfAcols" 4572 /*@C 4573 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4574 4575 Collective on Mat 4576 4577 Input Parameters: 4578 + A,B - the matrices in mpiaij format 4579 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4580 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4581 4582 Output Parameter: 4583 + rowb, colb - index sets of rows and columns of B to extract 4584 - B_seq - the sequential matrix generated 4585 4586 Level: developer 4587 4588 @*/ 4589 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4590 { 4591 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4592 PetscErrorCode ierr; 4593 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4594 IS isrowb,iscolb; 4595 Mat *bseq=NULL; 4596 4597 PetscFunctionBegin; 4598 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4599 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4600 } 4601 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4602 4603 if (scall == MAT_INITIAL_MATRIX) { 4604 start = A->cmap->rstart; 4605 cmap = a->garray; 4606 nzA = a->A->cmap->n; 4607 nzB = a->B->cmap->n; 4608 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4609 ncols = 0; 4610 for (i=0; i<nzB; i++) { /* row < local row index */ 4611 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4612 else break; 4613 } 4614 imark = i; 4615 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4616 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4617 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4618 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4619 } else { 4620 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4621 isrowb = *rowb; iscolb = *colb; 4622 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4623 bseq[0] = *B_seq; 4624 } 4625 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4626 *B_seq = bseq[0]; 4627 ierr = PetscFree(bseq);CHKERRQ(ierr); 4628 if (!rowb) { 4629 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4630 } else { 4631 *rowb = isrowb; 4632 } 4633 if (!colb) { 4634 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4635 } else { 4636 *colb = iscolb; 4637 } 4638 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4639 PetscFunctionReturn(0); 4640 } 4641 4642 #undef __FUNCT__ 4643 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4644 /* 4645 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4646 of the OFF-DIAGONAL portion of local A 4647 4648 Collective on Mat 4649 4650 Input Parameters: 4651 + A,B - the matrices in mpiaij format 4652 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4653 4654 Output Parameter: 4655 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4656 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4657 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4658 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4659 4660 Level: developer 4661 4662 */ 4663 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4664 { 4665 VecScatter_MPI_General *gen_to,*gen_from; 4666 PetscErrorCode ierr; 4667 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4668 Mat_SeqAIJ *b_oth; 4669 VecScatter ctx =a->Mvctx; 4670 MPI_Comm comm; 4671 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4672 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4673 PetscScalar *rvalues,*svalues; 4674 MatScalar *b_otha,*bufa,*bufA; 4675 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4676 MPI_Request *rwaits = NULL,*swaits = NULL; 4677 MPI_Status *sstatus,rstatus; 4678 PetscMPIInt jj,size; 4679 PetscInt *cols,sbs,rbs; 4680 PetscScalar *vals; 4681 4682 PetscFunctionBegin; 4683 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4684 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4685 4686 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4687 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4688 } 4689 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4690 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4691 4692 gen_to = (VecScatter_MPI_General*)ctx->todata; 4693 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4694 rvalues = gen_from->values; /* holds the length of receiving row */ 4695 svalues = gen_to->values; /* holds the length of sending row */ 4696 nrecvs = gen_from->n; 4697 nsends = gen_to->n; 4698 4699 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4700 srow = gen_to->indices; /* local row index to be sent */ 4701 sstarts = gen_to->starts; 4702 sprocs = gen_to->procs; 4703 sstatus = gen_to->sstatus; 4704 sbs = gen_to->bs; 4705 rstarts = gen_from->starts; 4706 rprocs = gen_from->procs; 4707 rbs = gen_from->bs; 4708 4709 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4710 if (scall == MAT_INITIAL_MATRIX) { 4711 /* i-array */ 4712 /*---------*/ 4713 /* post receives */ 4714 for (i=0; i<nrecvs; i++) { 4715 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4716 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4717 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4718 } 4719 4720 /* pack the outgoing message */ 4721 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4722 4723 sstartsj[0] = 0; 4724 rstartsj[0] = 0; 4725 len = 0; /* total length of j or a array to be sent */ 4726 k = 0; 4727 for (i=0; i<nsends; i++) { 4728 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4729 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4730 for (j=0; j<nrows; j++) { 4731 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4732 for (l=0; l<sbs; l++) { 4733 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4734 4735 rowlen[j*sbs+l] = ncols; 4736 4737 len += ncols; 4738 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4739 } 4740 k++; 4741 } 4742 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4743 4744 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4745 } 4746 /* recvs and sends of i-array are completed */ 4747 i = nrecvs; 4748 while (i--) { 4749 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4750 } 4751 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4752 4753 /* allocate buffers for sending j and a arrays */ 4754 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4755 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4756 4757 /* create i-array of B_oth */ 4758 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4759 4760 b_othi[0] = 0; 4761 len = 0; /* total length of j or a array to be received */ 4762 k = 0; 4763 for (i=0; i<nrecvs; i++) { 4764 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4765 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4766 for (j=0; j<nrows; j++) { 4767 b_othi[k+1] = b_othi[k] + rowlen[j]; 4768 len += rowlen[j]; k++; 4769 } 4770 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4771 } 4772 4773 /* allocate space for j and a arrrays of B_oth */ 4774 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4775 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4776 4777 /* j-array */ 4778 /*---------*/ 4779 /* post receives of j-array */ 4780 for (i=0; i<nrecvs; i++) { 4781 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4782 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4783 } 4784 4785 /* pack the outgoing message j-array */ 4786 k = 0; 4787 for (i=0; i<nsends; i++) { 4788 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4789 bufJ = bufj+sstartsj[i]; 4790 for (j=0; j<nrows; j++) { 4791 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4792 for (ll=0; ll<sbs; ll++) { 4793 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4794 for (l=0; l<ncols; l++) { 4795 *bufJ++ = cols[l]; 4796 } 4797 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4798 } 4799 } 4800 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4801 } 4802 4803 /* recvs and sends of j-array are completed */ 4804 i = nrecvs; 4805 while (i--) { 4806 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4807 } 4808 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4809 } else if (scall == MAT_REUSE_MATRIX) { 4810 sstartsj = *startsj_s; 4811 rstartsj = *startsj_r; 4812 bufa = *bufa_ptr; 4813 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4814 b_otha = b_oth->a; 4815 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4816 4817 /* a-array */ 4818 /*---------*/ 4819 /* post receives of a-array */ 4820 for (i=0; i<nrecvs; i++) { 4821 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4822 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4823 } 4824 4825 /* pack the outgoing message a-array */ 4826 k = 0; 4827 for (i=0; i<nsends; i++) { 4828 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4829 bufA = bufa+sstartsj[i]; 4830 for (j=0; j<nrows; j++) { 4831 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4832 for (ll=0; ll<sbs; ll++) { 4833 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4834 for (l=0; l<ncols; l++) { 4835 *bufA++ = vals[l]; 4836 } 4837 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4838 } 4839 } 4840 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4841 } 4842 /* recvs and sends of a-array are completed */ 4843 i = nrecvs; 4844 while (i--) { 4845 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4846 } 4847 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4848 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4849 4850 if (scall == MAT_INITIAL_MATRIX) { 4851 /* put together the new matrix */ 4852 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4853 4854 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4855 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4856 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4857 b_oth->free_a = PETSC_TRUE; 4858 b_oth->free_ij = PETSC_TRUE; 4859 b_oth->nonew = 0; 4860 4861 ierr = PetscFree(bufj);CHKERRQ(ierr); 4862 if (!startsj_s || !bufa_ptr) { 4863 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4864 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4865 } else { 4866 *startsj_s = sstartsj; 4867 *startsj_r = rstartsj; 4868 *bufa_ptr = bufa; 4869 } 4870 } 4871 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "MatGetCommunicationStructs" 4877 /*@C 4878 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4879 4880 Not Collective 4881 4882 Input Parameters: 4883 . A - The matrix in mpiaij format 4884 4885 Output Parameter: 4886 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4887 . colmap - A map from global column index to local index into lvec 4888 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4889 4890 Level: developer 4891 4892 @*/ 4893 #if defined(PETSC_USE_CTABLE) 4894 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4895 #else 4896 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4897 #endif 4898 { 4899 Mat_MPIAIJ *a; 4900 4901 PetscFunctionBegin; 4902 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4903 PetscValidPointer(lvec, 2); 4904 PetscValidPointer(colmap, 3); 4905 PetscValidPointer(multScatter, 4); 4906 a = (Mat_MPIAIJ*) A->data; 4907 if (lvec) *lvec = a->lvec; 4908 if (colmap) *colmap = a->colmap; 4909 if (multScatter) *multScatter = a->Mvctx; 4910 PetscFunctionReturn(0); 4911 } 4912 4913 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4914 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4915 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4916 #if defined(PETSC_HAVE_ELEMENTAL) 4917 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4918 #endif 4919 4920 #undef __FUNCT__ 4921 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4922 /* 4923 Computes (B'*A')' since computing B*A directly is untenable 4924 4925 n p p 4926 ( ) ( ) ( ) 4927 m ( A ) * n ( B ) = m ( C ) 4928 ( ) ( ) ( ) 4929 4930 */ 4931 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4932 { 4933 PetscErrorCode ierr; 4934 Mat At,Bt,Ct; 4935 4936 PetscFunctionBegin; 4937 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4938 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4939 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4940 ierr = MatDestroy(&At);CHKERRQ(ierr); 4941 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4942 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4943 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4944 PetscFunctionReturn(0); 4945 } 4946 4947 #undef __FUNCT__ 4948 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4949 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4950 { 4951 PetscErrorCode ierr; 4952 PetscInt m=A->rmap->n,n=B->cmap->n; 4953 Mat Cmat; 4954 4955 PetscFunctionBegin; 4956 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4957 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4958 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4959 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4960 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4961 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4962 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4963 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4964 4965 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4966 4967 *C = Cmat; 4968 PetscFunctionReturn(0); 4969 } 4970 4971 /* ----------------------------------------------------------------*/ 4972 #undef __FUNCT__ 4973 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4974 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4975 { 4976 PetscErrorCode ierr; 4977 4978 PetscFunctionBegin; 4979 if (scall == MAT_INITIAL_MATRIX) { 4980 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4981 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4982 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4983 } 4984 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4985 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4986 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4987 PetscFunctionReturn(0); 4988 } 4989 4990 /*MC 4991 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4992 4993 Options Database Keys: 4994 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4995 4996 Level: beginner 4997 4998 .seealso: MatCreateAIJ() 4999 M*/ 5000 5001 #undef __FUNCT__ 5002 #define __FUNCT__ "MatCreate_MPIAIJ" 5003 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5004 { 5005 Mat_MPIAIJ *b; 5006 PetscErrorCode ierr; 5007 PetscMPIInt size; 5008 5009 PetscFunctionBegin; 5010 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5011 5012 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5013 B->data = (void*)b; 5014 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5015 B->assembled = PETSC_FALSE; 5016 B->insertmode = NOT_SET_VALUES; 5017 b->size = size; 5018 5019 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5020 5021 /* build cache for off array entries formed */ 5022 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5023 5024 b->donotstash = PETSC_FALSE; 5025 b->colmap = 0; 5026 b->garray = 0; 5027 b->roworiented = PETSC_TRUE; 5028 5029 /* stuff used for matrix vector multiply */ 5030 b->lvec = NULL; 5031 b->Mvctx = NULL; 5032 5033 /* stuff for MatGetRow() */ 5034 b->rowindices = 0; 5035 b->rowvalues = 0; 5036 b->getrowactive = PETSC_FALSE; 5037 5038 /* flexible pointer used in CUSP/CUSPARSE classes */ 5039 b->spptr = NULL; 5040 5041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5052 #if defined(PETSC_HAVE_ELEMENTAL) 5053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5054 #endif 5055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5058 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5059 PetscFunctionReturn(0); 5060 } 5061 5062 #undef __FUNCT__ 5063 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5064 /*@C 5065 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5066 and "off-diagonal" part of the matrix in CSR format. 5067 5068 Collective on MPI_Comm 5069 5070 Input Parameters: 5071 + comm - MPI communicator 5072 . m - number of local rows (Cannot be PETSC_DECIDE) 5073 . n - This value should be the same as the local size used in creating the 5074 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5075 calculated if N is given) For square matrices n is almost always m. 5076 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5077 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5078 . i - row indices for "diagonal" portion of matrix 5079 . j - column indices 5080 . a - matrix values 5081 . oi - row indices for "off-diagonal" portion of matrix 5082 . oj - column indices 5083 - oa - matrix values 5084 5085 Output Parameter: 5086 . mat - the matrix 5087 5088 Level: advanced 5089 5090 Notes: 5091 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5092 must free the arrays once the matrix has been destroyed and not before. 5093 5094 The i and j indices are 0 based 5095 5096 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5097 5098 This sets local rows and cannot be used to set off-processor values. 5099 5100 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5101 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5102 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5103 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5104 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5105 communication if it is known that only local entries will be set. 5106 5107 .keywords: matrix, aij, compressed row, sparse, parallel 5108 5109 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5110 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5111 @*/ 5112 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5113 { 5114 PetscErrorCode ierr; 5115 Mat_MPIAIJ *maij; 5116 5117 PetscFunctionBegin; 5118 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5119 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5120 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5121 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5122 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5123 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5124 maij = (Mat_MPIAIJ*) (*mat)->data; 5125 5126 (*mat)->preallocated = PETSC_TRUE; 5127 5128 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5129 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5130 5131 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5132 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5133 5134 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5135 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5136 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5137 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5138 5139 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5140 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5141 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5142 PetscFunctionReturn(0); 5143 } 5144 5145 /* 5146 Special version for direct calls from Fortran 5147 */ 5148 #include <petsc/private/fortranimpl.h> 5149 5150 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5151 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5152 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5153 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5154 #endif 5155 5156 /* Change these macros so can be used in void function */ 5157 #undef CHKERRQ 5158 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5159 #undef SETERRQ2 5160 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5161 #undef SETERRQ3 5162 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5163 #undef SETERRQ 5164 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5165 5166 #undef __FUNCT__ 5167 #define __FUNCT__ "matsetvaluesmpiaij_" 5168 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5169 { 5170 Mat mat = *mmat; 5171 PetscInt m = *mm, n = *mn; 5172 InsertMode addv = *maddv; 5173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5174 PetscScalar value; 5175 PetscErrorCode ierr; 5176 5177 MatCheckPreallocated(mat,1); 5178 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5179 5180 #if defined(PETSC_USE_DEBUG) 5181 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5182 #endif 5183 { 5184 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5185 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5186 PetscBool roworiented = aij->roworiented; 5187 5188 /* Some Variables required in the macro */ 5189 Mat A = aij->A; 5190 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5191 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5192 MatScalar *aa = a->a; 5193 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5194 Mat B = aij->B; 5195 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5196 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5197 MatScalar *ba = b->a; 5198 5199 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5200 PetscInt nonew = a->nonew; 5201 MatScalar *ap1,*ap2; 5202 5203 PetscFunctionBegin; 5204 for (i=0; i<m; i++) { 5205 if (im[i] < 0) continue; 5206 #if defined(PETSC_USE_DEBUG) 5207 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5208 #endif 5209 if (im[i] >= rstart && im[i] < rend) { 5210 row = im[i] - rstart; 5211 lastcol1 = -1; 5212 rp1 = aj + ai[row]; 5213 ap1 = aa + ai[row]; 5214 rmax1 = aimax[row]; 5215 nrow1 = ailen[row]; 5216 low1 = 0; 5217 high1 = nrow1; 5218 lastcol2 = -1; 5219 rp2 = bj + bi[row]; 5220 ap2 = ba + bi[row]; 5221 rmax2 = bimax[row]; 5222 nrow2 = bilen[row]; 5223 low2 = 0; 5224 high2 = nrow2; 5225 5226 for (j=0; j<n; j++) { 5227 if (roworiented) value = v[i*n+j]; 5228 else value = v[i+j*m]; 5229 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5230 if (in[j] >= cstart && in[j] < cend) { 5231 col = in[j] - cstart; 5232 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5233 } else if (in[j] < 0) continue; 5234 #if defined(PETSC_USE_DEBUG) 5235 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5236 #endif 5237 else { 5238 if (mat->was_assembled) { 5239 if (!aij->colmap) { 5240 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5241 } 5242 #if defined(PETSC_USE_CTABLE) 5243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5244 col--; 5245 #else 5246 col = aij->colmap[in[j]] - 1; 5247 #endif 5248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5249 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5250 col = in[j]; 5251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5252 B = aij->B; 5253 b = (Mat_SeqAIJ*)B->data; 5254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5255 rp2 = bj + bi[row]; 5256 ap2 = ba + bi[row]; 5257 rmax2 = bimax[row]; 5258 nrow2 = bilen[row]; 5259 low2 = 0; 5260 high2 = nrow2; 5261 bm = aij->B->rmap->n; 5262 ba = b->a; 5263 } 5264 } else col = in[j]; 5265 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5266 } 5267 } 5268 } else if (!aij->donotstash) { 5269 if (roworiented) { 5270 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5271 } else { 5272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5273 } 5274 } 5275 } 5276 } 5277 PetscFunctionReturnVoid(); 5278 } 5279 5280