1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *owners = A->rmap->range; 787 PetscInt n = A->rmap->n; 788 PetscSF sf; 789 PetscInt *lrows; 790 PetscSFNode *rrows; 791 PetscInt r, p = 0, len = 0; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 /* Create SF where leaves are input rows and roots are owned rows */ 796 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 797 for (r = 0; r < n; ++r) lrows[r] = -1; 798 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 799 for (r = 0; r < N; ++r) { 800 const PetscInt idx = rows[r]; 801 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 802 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 803 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 804 } 805 if (A->nooffproczerorows) { 806 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 807 lrows[len++] = idx - owners[p]; 808 } else { 809 rrows[r].rank = p; 810 rrows[r].index = rows[r] - owners[p]; 811 } 812 } 813 if (!A->nooffproczerorows) { 814 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 815 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 816 /* Collect flags for rows to be zeroed */ 817 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 819 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 820 /* Compress and put in row numbers */ 821 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 822 } 823 /* fix right hand side if needed */ 824 if (x && b) { 825 const PetscScalar *xx; 826 PetscScalar *bb; 827 828 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 829 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 831 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 833 } 834 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 835 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 836 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 837 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 838 } else if (diag != 0.0) { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 841 for (r = 0; r < len; ++r) { 842 const PetscInt row = lrows[r] + A->rmap->rstart; 843 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 844 } 845 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 } else { 848 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 849 } 850 ierr = PetscFree(lrows);CHKERRQ(ierr); 851 852 /* only change matrix nonzero state if pattern was allowed to be changed */ 853 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 854 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 855 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 865 PetscErrorCode ierr; 866 PetscMPIInt n = A->rmap->n; 867 PetscInt i,j,r,m,p = 0,len = 0; 868 PetscInt *lrows,*owners = A->rmap->range; 869 PetscSFNode *rrows; 870 PetscSF sf; 871 const PetscScalar *xx; 872 PetscScalar *bb,*mask; 873 Vec xmask,lmask; 874 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 875 const PetscInt *aj, *ii,*ridx; 876 PetscScalar *aa; 877 878 PetscFunctionBegin; 879 /* Create SF where leaves are input rows and roots are owned rows */ 880 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 881 for (r = 0; r < n; ++r) lrows[r] = -1; 882 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 883 for (r = 0; r < N; ++r) { 884 const PetscInt idx = rows[r]; 885 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 886 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 887 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 888 } 889 rrows[r].rank = p; 890 rrows[r].index = rows[r] - owners[p]; 891 } 892 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 893 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 894 /* Collect flags for rows to be zeroed */ 895 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 896 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 897 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 898 /* Compress and put in row numbers */ 899 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 900 /* zero diagonal part of matrix */ 901 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 902 /* handle off diagonal part of matrix */ 903 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 904 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 905 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 906 for (i=0; i<len; i++) bb[lrows[i]] = 1; 907 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 908 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 911 if (x) { 912 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 916 } 917 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 918 /* remove zeroed rows of off diagonal matrix */ 919 ii = aij->i; 920 for (i=0; i<len; i++) { 921 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 922 } 923 /* loop over all elements of off process part of matrix zeroing removed columns*/ 924 if (aij->compressedrow.use) { 925 m = aij->compressedrow.nrows; 926 ii = aij->compressedrow.i; 927 ridx = aij->compressedrow.rindex; 928 for (i=0; i<m; i++) { 929 n = ii[i+1] - ii[i]; 930 aj = aij->j + ii[i]; 931 aa = aij->a + ii[i]; 932 933 for (j=0; j<n; j++) { 934 if (PetscAbsScalar(mask[*aj])) { 935 if (b) bb[*ridx] -= *aa*xx[*aj]; 936 *aa = 0.0; 937 } 938 aa++; 939 aj++; 940 } 941 ridx++; 942 } 943 } else { /* do not use compressed row format */ 944 m = l->B->rmap->n; 945 for (i=0; i<m; i++) { 946 n = ii[i+1] - ii[i]; 947 aj = aij->j + ii[i]; 948 aa = aij->a + ii[i]; 949 for (j=0; j<n; j++) { 950 if (PetscAbsScalar(mask[*aj])) { 951 if (b) bb[i] -= *aa*xx[*aj]; 952 *aa = 0.0; 953 } 954 aa++; 955 aj++; 956 } 957 } 958 } 959 if (x) { 960 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 961 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 962 } 963 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 964 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 965 ierr = PetscFree(lrows);CHKERRQ(ierr); 966 967 /* only change matrix nonzero state if pattern was allowed to be changed */ 968 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 969 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 970 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMult_MPIAIJ" 977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscInt nt; 982 983 PetscFunctionBegin; 984 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 985 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1008 { 1009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscBool merged; 1027 1028 PetscFunctionBegin; 1029 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1030 /* do nondiagonal part */ 1031 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1032 if (!merged) { 1033 /* send it on its way */ 1034 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 /* do local part */ 1036 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1037 /* receive remote parts: note this assumes the values are not actually */ 1038 /* added in yy until the next line, */ 1039 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1040 } else { 1041 /* do local part */ 1042 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* values actually were received in the Begin() but we need to call this nop */ 1046 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1053 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1054 { 1055 MPI_Comm comm; 1056 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1057 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1058 IS Me,Notme; 1059 PetscErrorCode ierr; 1060 PetscInt M,N,first,last,*notme,i; 1061 PetscMPIInt size; 1062 1063 PetscFunctionBegin; 1064 /* Easy test: symmetric diagonal block */ 1065 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1066 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1067 if (!*f) PetscFunctionReturn(0); 1068 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1070 if (size == 1) PetscFunctionReturn(0); 1071 1072 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1073 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1074 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1076 for (i=0; i<first; i++) notme[i] = i; 1077 for (i=last; i<M; i++) notme[i-last+first] = i; 1078 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1079 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1080 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1081 Aoff = Aoffs[0]; 1082 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1083 Boff = Boffs[0]; 1084 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1085 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1086 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1087 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1088 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1089 ierr = PetscFree(notme);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 /* do nondiagonal part */ 1102 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1103 /* send it on its way */ 1104 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1105 /* do local part */ 1106 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1107 /* receive remote parts */ 1108 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 This only works correctly for square matrices where the subblock A->A is the 1114 diagonal block 1115 */ 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1119 { 1120 PetscErrorCode ierr; 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 1123 PetscFunctionBegin; 1124 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1125 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1126 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatScale_MPIAIJ" 1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1139 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatDestroy_MPIAIJ" 1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 #if defined(PETSC_USE_LOG) 1152 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1153 #endif 1154 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1156 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1157 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_CTABLE) 1159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1160 #else 1161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1166 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1167 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1168 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1169 1170 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1192 PetscErrorCode ierr; 1193 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1194 int fd; 1195 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1196 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1197 PetscScalar *column_values; 1198 PetscInt message_count,flowcontrolcount; 1199 FILE *file; 1200 1201 PetscFunctionBegin; 1202 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1204 nz = A->nz + B->nz; 1205 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1206 if (!rank) { 1207 header[0] = MAT_FILE_CLASSID; 1208 header[1] = mat->rmap->N; 1209 header[2] = mat->cmap->N; 1210 1211 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 /* get largest number of rows any processor has */ 1214 rlen = mat->rmap->n; 1215 range = mat->rmap->range; 1216 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1217 } else { 1218 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 rlen = mat->rmap->n; 1220 } 1221 1222 /* load up the local row counts */ 1223 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1224 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1225 1226 /* store the row lengths to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 for (i=1; i<size; i++) { 1231 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1232 rlen = range[i+1] - range[i]; 1233 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1235 } 1236 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1237 } else { 1238 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1239 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1241 } 1242 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1243 1244 /* load up the local column indices */ 1245 nzmax = nz; /* th processor needs space a largest processor needs */ 1246 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1248 cnt = 0; 1249 for (i=0; i<mat->rmap->n; i++) { 1250 for (j=B->i[i]; j<B->i[i+1]; j++) { 1251 if ((col = garray[B->j[j]]) > cstart) break; 1252 column_indices[cnt++] = col; 1253 } 1254 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1255 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1256 } 1257 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1258 1259 /* store the column indices to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 MPI_Status status; 1263 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1268 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1270 } 1271 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1274 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1277 } 1278 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1279 1280 /* load up the local column values */ 1281 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1282 cnt = 0; 1283 for (i=0; i<mat->rmap->n; i++) { 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if (garray[B->j[j]] > cstart) break; 1286 column_values[cnt++] = B->a[j]; 1287 } 1288 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1289 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1290 } 1291 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1292 1293 /* store the column values to the file */ 1294 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1295 if (!rank) { 1296 MPI_Status status; 1297 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1301 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1302 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1304 } 1305 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1308 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(column_values);CHKERRQ(ierr); 1313 1314 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1315 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #include <petscdraw.h> 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1323 { 1324 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 PetscMPIInt rank = aij->rank,size = aij->size; 1327 PetscBool isdraw,iascii,isbinary; 1328 PetscViewer sviewer; 1329 PetscViewerFormat format; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1333 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1335 if (iascii) { 1336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1337 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1338 MatInfo info; 1339 PetscBool inodes; 1340 1341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1342 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1343 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1344 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1345 if (!inodes) { 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1347 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1348 } else { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } 1352 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1354 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1356 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1359 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1362 PetscInt inodecount,inodelimit,*inodes; 1363 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1364 if (inodes) { 1365 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1366 } else { 1367 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1371 PetscFunctionReturn(0); 1372 } 1373 } else if (isbinary) { 1374 if (size == 1) { 1375 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1376 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } else if (isdraw) { 1382 PetscDraw draw; 1383 PetscBool isnull; 1384 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1385 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1386 if (isnull) PetscFunctionReturn(0); 1387 } 1388 1389 { 1390 /* assemble the entire matrix onto first processor. */ 1391 Mat A; 1392 Mat_SeqAIJ *Aloc; 1393 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1394 MatScalar *a; 1395 1396 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1397 if (!rank) { 1398 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1399 } else { 1400 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1401 } 1402 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1403 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1404 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1405 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1406 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1407 1408 /* copy over the A part */ 1409 Aloc = (Mat_SeqAIJ*)aij->A->data; 1410 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1411 row = mat->rmap->rstart; 1412 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1413 for (i=0; i<m; i++) { 1414 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1415 row++; 1416 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1417 } 1418 aj = Aloc->j; 1419 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1420 1421 /* copy over the B part */ 1422 Aloc = (Mat_SeqAIJ*)aij->B->data; 1423 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1424 row = mat->rmap->rstart; 1425 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1426 ct = cols; 1427 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1428 for (i=0; i<m; i++) { 1429 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1430 row++; 1431 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1432 } 1433 ierr = PetscFree(ct);CHKERRQ(ierr); 1434 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 /* 1437 Everyone has to call to draw the matrix since the graphics waits are 1438 synchronized across all processors that share the PetscDraw object 1439 */ 1440 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1441 if (!rank) { 1442 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1443 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1444 } 1445 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1446 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1447 ierr = MatDestroy(&A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatView_MPIAIJ" 1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1455 { 1456 PetscErrorCode ierr; 1457 PetscBool iascii,isdraw,issocket,isbinary; 1458 1459 PetscFunctionBegin; 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1464 if (iascii || isdraw || isbinary || issocket) { 1465 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatSOR_MPIAIJ" 1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1473 { 1474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1475 PetscErrorCode ierr; 1476 Vec bb1 = 0; 1477 PetscBool hasop; 1478 1479 PetscFunctionBegin; 1480 if (flag == SOR_APPLY_UPPER) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1486 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1487 } 1488 1489 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1490 if (flag & SOR_ZERO_INITIAL_GUESS) { 1491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1492 its--; 1493 } 1494 1495 while (its--) { 1496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 1499 /* update rhs: bb1 = bb - B*x */ 1500 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1502 1503 /* local sweep */ 1504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1505 } 1506 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1507 if (flag & SOR_ZERO_INITIAL_GUESS) { 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1509 its--; 1510 } 1511 while (its--) { 1512 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 1515 /* update rhs: bb1 = bb - B*x */ 1516 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1517 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1518 1519 /* local sweep */ 1520 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1521 } 1522 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1523 if (flag & SOR_ZERO_INITIAL_GUESS) { 1524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1525 its--; 1526 } 1527 while (its--) { 1528 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 1531 /* update rhs: bb1 = bb - B*x */ 1532 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1533 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1537 } 1538 } else if (flag & SOR_EISENSTAT) { 1539 Vec xx1; 1540 1541 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1543 1544 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 if (!mat->diag) { 1547 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1548 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1549 } 1550 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1551 if (hasop) { 1552 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1553 } else { 1554 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1555 } 1556 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1557 1558 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1559 1560 /* local sweep */ 1561 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1562 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1563 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1564 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1565 1566 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1567 1568 matin->errortype = mat->A->errortype; 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatPermute_MPIAIJ" 1574 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1575 { 1576 Mat aA,aB,Aperm; 1577 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1578 PetscScalar *aa,*ba; 1579 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1580 PetscSF rowsf,sf; 1581 IS parcolp = NULL; 1582 PetscBool done; 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1587 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1588 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1589 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1590 1591 /* Invert row permutation to find out where my rows should go */ 1592 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1593 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1594 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1595 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1596 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1597 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1598 1599 /* Invert column permutation to find out where my columns should go */ 1600 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1601 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1602 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1603 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1604 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1605 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1607 1608 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1609 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1610 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1611 1612 /* Find out where my gcols should go */ 1613 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1614 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1615 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1616 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1617 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1618 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1619 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1621 1622 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1623 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1624 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1625 for (i=0; i<m; i++) { 1626 PetscInt row = rdest[i],rowner; 1627 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1628 for (j=ai[i]; j<ai[i+1]; j++) { 1629 PetscInt cowner,col = cdest[aj[j]]; 1630 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1631 if (rowner == cowner) dnnz[i]++; 1632 else onnz[i]++; 1633 } 1634 for (j=bi[i]; j<bi[i+1]; j++) { 1635 PetscInt cowner,col = gcdest[bj[j]]; 1636 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1637 if (rowner == cowner) dnnz[i]++; 1638 else onnz[i]++; 1639 } 1640 } 1641 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1642 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1643 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1644 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1645 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1646 1647 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1648 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1649 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1650 for (i=0; i<m; i++) { 1651 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1652 PetscInt j0,rowlen; 1653 rowlen = ai[i+1] - ai[i]; 1654 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1655 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1656 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 rowlen = bi[i+1] - bi[i]; 1659 for (j0=j=0; j<rowlen; j0=j) { 1660 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1661 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1662 } 1663 } 1664 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1667 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1668 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1669 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1670 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1671 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1672 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1673 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1674 *B = Aperm; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1680 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1681 { 1682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1687 if (ghosts) *ghosts = aij->garray; 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1693 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1694 { 1695 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1696 Mat A = mat->A,B = mat->B; 1697 PetscErrorCode ierr; 1698 PetscReal isend[5],irecv[5]; 1699 1700 PetscFunctionBegin; 1701 info->block_size = 1.0; 1702 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1703 1704 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1705 isend[3] = info->memory; isend[4] = info->mallocs; 1706 1707 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1708 1709 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1710 isend[3] += info->memory; isend[4] += info->mallocs; 1711 if (flag == MAT_LOCAL) { 1712 info->nz_used = isend[0]; 1713 info->nz_allocated = isend[1]; 1714 info->nz_unneeded = isend[2]; 1715 info->memory = isend[3]; 1716 info->mallocs = isend[4]; 1717 } else if (flag == MAT_GLOBAL_MAX) { 1718 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1719 1720 info->nz_used = irecv[0]; 1721 info->nz_allocated = irecv[1]; 1722 info->nz_unneeded = irecv[2]; 1723 info->memory = irecv[3]; 1724 info->mallocs = irecv[4]; 1725 } else if (flag == MAT_GLOBAL_SUM) { 1726 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1727 1728 info->nz_used = irecv[0]; 1729 info->nz_allocated = irecv[1]; 1730 info->nz_unneeded = irecv[2]; 1731 info->memory = irecv[3]; 1732 info->mallocs = irecv[4]; 1733 } 1734 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1735 info->fill_ratio_needed = 0; 1736 info->factor_mallocs = 0; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatSetOption_MPIAIJ" 1742 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1743 { 1744 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 switch (op) { 1749 case MAT_NEW_NONZERO_LOCATIONS: 1750 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1751 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1752 case MAT_KEEP_NONZERO_PATTERN: 1753 case MAT_NEW_NONZERO_LOCATION_ERR: 1754 case MAT_USE_INODES: 1755 case MAT_IGNORE_ZERO_ENTRIES: 1756 MatCheckPreallocated(A,1); 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_ROW_ORIENTED: 1761 MatCheckPreallocated(A,1); 1762 a->roworiented = flg; 1763 1764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1765 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1766 break; 1767 case MAT_NEW_DIAGONALS: 1768 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1769 break; 1770 case MAT_IGNORE_OFF_PROC_ENTRIES: 1771 a->donotstash = flg; 1772 break; 1773 case MAT_SPD: 1774 A->spd_set = PETSC_TRUE; 1775 A->spd = flg; 1776 if (flg) { 1777 A->symmetric = PETSC_TRUE; 1778 A->structurally_symmetric = PETSC_TRUE; 1779 A->symmetric_set = PETSC_TRUE; 1780 A->structurally_symmetric_set = PETSC_TRUE; 1781 } 1782 break; 1783 case MAT_SYMMETRIC: 1784 MatCheckPreallocated(A,1); 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 case MAT_STRUCTURALLY_SYMMETRIC: 1788 MatCheckPreallocated(A,1); 1789 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1790 break; 1791 case MAT_HERMITIAN: 1792 MatCheckPreallocated(A,1); 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_SYMMETRY_ETERNAL: 1796 MatCheckPreallocated(A,1); 1797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1798 break; 1799 default: 1800 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatGetRow_MPIAIJ" 1807 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1808 { 1809 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1810 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1811 PetscErrorCode ierr; 1812 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1813 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1814 PetscInt *cmap,*idx_p; 1815 1816 PetscFunctionBegin; 1817 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1818 mat->getrowactive = PETSC_TRUE; 1819 1820 if (!mat->rowvalues && (idx || v)) { 1821 /* 1822 allocate enough space to hold information from the longest row. 1823 */ 1824 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1825 PetscInt max = 1,tmp; 1826 for (i=0; i<matin->rmap->n; i++) { 1827 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1828 if (max < tmp) max = tmp; 1829 } 1830 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1831 } 1832 1833 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1834 lrow = row - rstart; 1835 1836 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1837 if (!v) {pvA = 0; pvB = 0;} 1838 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1839 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1840 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1841 nztot = nzA + nzB; 1842 1843 cmap = mat->garray; 1844 if (v || idx) { 1845 if (nztot) { 1846 /* Sort by increasing column numbers, assuming A and B already sorted */ 1847 PetscInt imark = -1; 1848 if (v) { 1849 *v = v_p = mat->rowvalues; 1850 for (i=0; i<nzB; i++) { 1851 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1852 else break; 1853 } 1854 imark = i; 1855 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1856 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1857 } 1858 if (idx) { 1859 *idx = idx_p = mat->rowindices; 1860 if (imark > -1) { 1861 for (i=0; i<imark; i++) { 1862 idx_p[i] = cmap[cworkB[i]]; 1863 } 1864 } else { 1865 for (i=0; i<nzB; i++) { 1866 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1867 else break; 1868 } 1869 imark = i; 1870 } 1871 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1872 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1873 } 1874 } else { 1875 if (idx) *idx = 0; 1876 if (v) *v = 0; 1877 } 1878 } 1879 *nz = nztot; 1880 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1881 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1887 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1888 { 1889 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1890 1891 PetscFunctionBegin; 1892 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1893 aij->getrowactive = PETSC_FALSE; 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatNorm_MPIAIJ" 1899 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1900 { 1901 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1902 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1903 PetscErrorCode ierr; 1904 PetscInt i,j,cstart = mat->cmap->rstart; 1905 PetscReal sum = 0.0; 1906 MatScalar *v; 1907 1908 PetscFunctionBegin; 1909 if (aij->size == 1) { 1910 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1911 } else { 1912 if (type == NORM_FROBENIUS) { 1913 v = amat->a; 1914 for (i=0; i<amat->nz; i++) { 1915 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1916 } 1917 v = bmat->a; 1918 for (i=0; i<bmat->nz; i++) { 1919 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1920 } 1921 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1922 *norm = PetscSqrtReal(*norm); 1923 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1924 } else if (type == NORM_1) { /* max column norm */ 1925 PetscReal *tmp,*tmp2; 1926 PetscInt *jj,*garray = aij->garray; 1927 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1928 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1929 *norm = 0.0; 1930 v = amat->a; jj = amat->j; 1931 for (j=0; j<amat->nz; j++) { 1932 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1933 } 1934 v = bmat->a; jj = bmat->j; 1935 for (j=0; j<bmat->nz; j++) { 1936 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1937 } 1938 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1939 for (j=0; j<mat->cmap->N; j++) { 1940 if (tmp2[j] > *norm) *norm = tmp2[j]; 1941 } 1942 ierr = PetscFree(tmp);CHKERRQ(ierr); 1943 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1944 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1945 } else if (type == NORM_INFINITY) { /* max row norm */ 1946 PetscReal ntemp = 0.0; 1947 for (j=0; j<aij->A->rmap->n; j++) { 1948 v = amat->a + amat->i[j]; 1949 sum = 0.0; 1950 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1951 sum += PetscAbsScalar(*v); v++; 1952 } 1953 v = bmat->a + bmat->i[j]; 1954 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1955 sum += PetscAbsScalar(*v); v++; 1956 } 1957 if (sum > ntemp) ntemp = sum; 1958 } 1959 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1960 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1961 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1962 } 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatTranspose_MPIAIJ" 1968 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1969 { 1970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1971 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1972 PetscErrorCode ierr; 1973 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1974 PetscInt cstart = A->cmap->rstart,ncol; 1975 Mat B; 1976 MatScalar *array; 1977 1978 PetscFunctionBegin; 1979 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1980 1981 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1982 ai = Aloc->i; aj = Aloc->j; 1983 bi = Bloc->i; bj = Bloc->j; 1984 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1985 PetscInt *d_nnz,*g_nnz,*o_nnz; 1986 PetscSFNode *oloc; 1987 PETSC_UNUSED PetscSF sf; 1988 1989 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1990 /* compute d_nnz for preallocation */ 1991 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1992 for (i=0; i<ai[ma]; i++) { 1993 d_nnz[aj[i]]++; 1994 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1995 } 1996 /* compute local off-diagonal contributions */ 1997 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1998 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1999 /* map those to global */ 2000 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2001 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2002 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2003 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2004 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2005 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2006 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2007 2008 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2009 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2010 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2011 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2012 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2013 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2014 } else { 2015 B = *matout; 2016 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2017 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2018 } 2019 2020 /* copy over the A part */ 2021 array = Aloc->a; 2022 row = A->rmap->rstart; 2023 for (i=0; i<ma; i++) { 2024 ncol = ai[i+1]-ai[i]; 2025 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2026 row++; 2027 array += ncol; aj += ncol; 2028 } 2029 aj = Aloc->j; 2030 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2031 2032 /* copy over the B part */ 2033 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2034 array = Bloc->a; 2035 row = A->rmap->rstart; 2036 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2037 cols_tmp = cols; 2038 for (i=0; i<mb; i++) { 2039 ncol = bi[i+1]-bi[i]; 2040 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2041 row++; 2042 array += ncol; cols_tmp += ncol; 2043 } 2044 ierr = PetscFree(cols);CHKERRQ(ierr); 2045 2046 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2047 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2048 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2049 *matout = B; 2050 } else { 2051 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2052 } 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2058 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2059 { 2060 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2061 Mat a = aij->A,b = aij->B; 2062 PetscErrorCode ierr; 2063 PetscInt s1,s2,s3; 2064 2065 PetscFunctionBegin; 2066 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2067 if (rr) { 2068 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2069 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2070 /* Overlap communication with computation. */ 2071 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 } 2073 if (ll) { 2074 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2075 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2076 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2077 } 2078 /* scale the diagonal block */ 2079 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2080 2081 if (rr) { 2082 /* Do a scatter end and then right scale the off-diagonal block */ 2083 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2091 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2092 { 2093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatEqual_MPIAIJ" 2103 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2104 { 2105 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2106 Mat a,b,c,d; 2107 PetscBool flg; 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 a = matA->A; b = matA->B; 2112 c = matB->A; d = matB->B; 2113 2114 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2115 if (flg) { 2116 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2117 } 2118 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatCopy_MPIAIJ" 2124 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2125 { 2126 PetscErrorCode ierr; 2127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2128 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2129 2130 PetscFunctionBegin; 2131 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2132 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2133 /* because of the column compression in the off-processor part of the matrix a->B, 2134 the number of columns in a->B and b->B may be different, hence we cannot call 2135 the MatCopy() directly on the two parts. If need be, we can provide a more 2136 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2137 then copying the submatrices */ 2138 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2139 } else { 2140 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2141 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2142 } 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatSetUp_MPIAIJ" 2148 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2149 { 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 /* 2158 Computes the number of nonzeros per row needed for preallocation when X and Y 2159 have different nonzero structure. 2160 */ 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2163 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2164 { 2165 PetscInt i,j,k,nzx,nzy; 2166 2167 PetscFunctionBegin; 2168 /* Set the number of nonzeros in the new matrix */ 2169 for (i=0; i<m; i++) { 2170 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2171 nzx = xi[i+1] - xi[i]; 2172 nzy = yi[i+1] - yi[i]; 2173 nnz[i] = 0; 2174 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2175 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2176 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2177 nnz[i]++; 2178 } 2179 for (; k<nzy; k++) nnz[i]++; 2180 } 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2187 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2188 { 2189 PetscErrorCode ierr; 2190 PetscInt m = Y->rmap->N; 2191 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2192 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2193 2194 PetscFunctionBegin; 2195 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatAXPY_MPIAIJ" 2201 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2202 { 2203 PetscErrorCode ierr; 2204 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2205 PetscBLASInt bnz,one=1; 2206 Mat_SeqAIJ *x,*y; 2207 2208 PetscFunctionBegin; 2209 if (str == SAME_NONZERO_PATTERN) { 2210 PetscScalar alpha = a; 2211 x = (Mat_SeqAIJ*)xx->A->data; 2212 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2213 y = (Mat_SeqAIJ*)yy->A->data; 2214 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2215 x = (Mat_SeqAIJ*)xx->B->data; 2216 y = (Mat_SeqAIJ*)yy->B->data; 2217 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2218 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2219 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2220 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2221 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2222 } else { 2223 Mat B; 2224 PetscInt *nnz_d,*nnz_o; 2225 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2226 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2227 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2228 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2229 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2230 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2231 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2232 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2233 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2234 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2235 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2236 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2237 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2238 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2239 } 2240 PetscFunctionReturn(0); 2241 } 2242 2243 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatConjugate_MPIAIJ" 2247 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2248 { 2249 #if defined(PETSC_USE_COMPLEX) 2250 PetscErrorCode ierr; 2251 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2252 2253 PetscFunctionBegin; 2254 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2255 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2256 #else 2257 PetscFunctionBegin; 2258 #endif 2259 PetscFunctionReturn(0); 2260 } 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatRealPart_MPIAIJ" 2264 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2265 { 2266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2267 PetscErrorCode ierr; 2268 2269 PetscFunctionBegin; 2270 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2271 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2277 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2278 { 2279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2284 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2290 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2291 { 2292 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2293 PetscErrorCode ierr; 2294 PetscInt i,*idxb = 0; 2295 PetscScalar *va,*vb; 2296 Vec vtmp; 2297 2298 PetscFunctionBegin; 2299 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2300 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2301 if (idx) { 2302 for (i=0; i<A->rmap->n; i++) { 2303 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2304 } 2305 } 2306 2307 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2308 if (idx) { 2309 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2310 } 2311 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2312 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2313 2314 for (i=0; i<A->rmap->n; i++) { 2315 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2316 va[i] = vb[i]; 2317 if (idx) idx[i] = a->garray[idxb[i]]; 2318 } 2319 } 2320 2321 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2322 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2323 ierr = PetscFree(idxb);CHKERRQ(ierr); 2324 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2330 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2331 { 2332 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2333 PetscErrorCode ierr; 2334 PetscInt i,*idxb = 0; 2335 PetscScalar *va,*vb; 2336 Vec vtmp; 2337 2338 PetscFunctionBegin; 2339 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2340 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2341 if (idx) { 2342 for (i=0; i<A->cmap->n; i++) { 2343 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2344 } 2345 } 2346 2347 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2348 if (idx) { 2349 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2350 } 2351 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2352 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2353 2354 for (i=0; i<A->rmap->n; i++) { 2355 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2356 va[i] = vb[i]; 2357 if (idx) idx[i] = a->garray[idxb[i]]; 2358 } 2359 } 2360 2361 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2362 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2363 ierr = PetscFree(idxb);CHKERRQ(ierr); 2364 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2370 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2371 { 2372 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2373 PetscInt n = A->rmap->n; 2374 PetscInt cstart = A->cmap->rstart; 2375 PetscInt *cmap = mat->garray; 2376 PetscInt *diagIdx, *offdiagIdx; 2377 Vec diagV, offdiagV; 2378 PetscScalar *a, *diagA, *offdiagA; 2379 PetscInt r; 2380 PetscErrorCode ierr; 2381 2382 PetscFunctionBegin; 2383 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2384 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2385 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2386 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2387 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2388 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2389 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2390 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2391 for (r = 0; r < n; ++r) { 2392 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2393 a[r] = diagA[r]; 2394 idx[r] = cstart + diagIdx[r]; 2395 } else { 2396 a[r] = offdiagA[r]; 2397 idx[r] = cmap[offdiagIdx[r]]; 2398 } 2399 } 2400 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2401 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2402 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2403 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2404 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2405 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2411 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2412 { 2413 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2414 PetscInt n = A->rmap->n; 2415 PetscInt cstart = A->cmap->rstart; 2416 PetscInt *cmap = mat->garray; 2417 PetscInt *diagIdx, *offdiagIdx; 2418 Vec diagV, offdiagV; 2419 PetscScalar *a, *diagA, *offdiagA; 2420 PetscInt r; 2421 PetscErrorCode ierr; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2425 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2426 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2427 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2428 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2429 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2430 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2431 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2432 for (r = 0; r < n; ++r) { 2433 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2434 a[r] = diagA[r]; 2435 idx[r] = cstart + diagIdx[r]; 2436 } else { 2437 a[r] = offdiagA[r]; 2438 idx[r] = cmap[offdiagIdx[r]]; 2439 } 2440 } 2441 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2442 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2443 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2444 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2445 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2446 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2453 { 2454 PetscErrorCode ierr; 2455 Mat *dummy; 2456 2457 PetscFunctionBegin; 2458 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2459 *newmat = *dummy; 2460 ierr = PetscFree(dummy);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2466 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2467 { 2468 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2469 PetscErrorCode ierr; 2470 2471 PetscFunctionBegin; 2472 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2473 A->errortype = a->A->errortype; 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2479 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2480 { 2481 PetscErrorCode ierr; 2482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2483 2484 PetscFunctionBegin; 2485 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2486 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2487 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2495 { 2496 PetscFunctionBegin; 2497 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2498 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2499 PetscFunctionReturn(0); 2500 } 2501 2502 #undef __FUNCT__ 2503 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2504 /*@ 2505 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2506 2507 Collective on Mat 2508 2509 Input Parameters: 2510 + A - the matrix 2511 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2512 2513 Level: advanced 2514 2515 @*/ 2516 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2517 { 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2527 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2528 { 2529 PetscErrorCode ierr; 2530 PetscBool sc = PETSC_FALSE,flg; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2534 ierr = PetscObjectOptionsBegin((PetscObject)A); 2535 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2536 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2537 if (flg) { 2538 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2539 } 2540 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatShift_MPIAIJ" 2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2550 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2551 2552 PetscFunctionBegin; 2553 if (!Y->preallocated) { 2554 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2555 } else if (!aij->nz) { 2556 PetscInt nonew = aij->nonew; 2557 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2558 aij->nonew = nonew; 2559 } 2560 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2566 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2567 { 2568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2573 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2574 if (d) { 2575 PetscInt rstart; 2576 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2577 *d += rstart; 2578 2579 } 2580 PetscFunctionReturn(0); 2581 } 2582 2583 2584 /* -------------------------------------------------------------------*/ 2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2586 MatGetRow_MPIAIJ, 2587 MatRestoreRow_MPIAIJ, 2588 MatMult_MPIAIJ, 2589 /* 4*/ MatMultAdd_MPIAIJ, 2590 MatMultTranspose_MPIAIJ, 2591 MatMultTransposeAdd_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 /*10*/ 0, 2596 0, 2597 0, 2598 MatSOR_MPIAIJ, 2599 MatTranspose_MPIAIJ, 2600 /*15*/ MatGetInfo_MPIAIJ, 2601 MatEqual_MPIAIJ, 2602 MatGetDiagonal_MPIAIJ, 2603 MatDiagonalScale_MPIAIJ, 2604 MatNorm_MPIAIJ, 2605 /*20*/ MatAssemblyBegin_MPIAIJ, 2606 MatAssemblyEnd_MPIAIJ, 2607 MatSetOption_MPIAIJ, 2608 MatZeroEntries_MPIAIJ, 2609 /*24*/ MatZeroRows_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*29*/ MatSetUp_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatGetSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 MatShift_MPIAIJ, 2632 MatDiagonalSet_MPIAIJ, 2633 MatZeroRowsColumns_MPIAIJ, 2634 /*49*/ MatSetRandom_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIXAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatGetSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2649 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2650 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 MatSetColoring_MPIAIJ, 2658 0, 2659 MatSetValuesAdifor_MPIAIJ, 2660 /*75*/ MatFDColoringApply_AIJ, 2661 MatSetFromOptions_MPIAIJ, 2662 0, 2663 0, 2664 MatFindZeroDiagonals_MPIAIJ, 2665 /*80*/ 0, 2666 0, 2667 0, 2668 /*83*/ MatLoad_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2677 MatPtAP_MPIAIJ_MPIAIJ, 2678 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2679 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*99*/ 0, 2685 0, 2686 0, 2687 MatConjugate_MPIAIJ, 2688 0, 2689 /*104*/MatSetValuesRow_MPIAIJ, 2690 MatRealPart_MPIAIJ, 2691 MatImaginaryPart_MPIAIJ, 2692 0, 2693 0, 2694 /*109*/0, 2695 0, 2696 MatGetRowMin_MPIAIJ, 2697 0, 2698 MatMissingDiagonal_MPIAIJ, 2699 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2700 0, 2701 MatGetGhosts_MPIAIJ, 2702 0, 2703 0, 2704 /*119*/0, 2705 0, 2706 0, 2707 0, 2708 MatGetMultiProcBlock_MPIAIJ, 2709 /*124*/MatFindNonzeroRows_MPIAIJ, 2710 MatGetColumnNorms_MPIAIJ, 2711 MatInvertBlockDiagonal_MPIAIJ, 2712 0, 2713 MatGetSubMatricesMPI_MPIAIJ, 2714 /*129*/0, 2715 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2716 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2717 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2718 0, 2719 /*134*/0, 2720 0, 2721 0, 2722 0, 2723 0, 2724 /*139*/0, 2725 0, 2726 0, 2727 MatFDColoringSetUp_MPIXAIJ, 2728 MatFindOffBlockDiagonalEntries_MPIAIJ, 2729 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2730 }; 2731 2732 /* ----------------------------------------------------------------------------------------*/ 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2736 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2737 { 2738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2739 PetscErrorCode ierr; 2740 2741 PetscFunctionBegin; 2742 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2743 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 #undef __FUNCT__ 2748 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2749 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2750 { 2751 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2756 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2762 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2763 { 2764 Mat_MPIAIJ *b; 2765 PetscErrorCode ierr; 2766 2767 PetscFunctionBegin; 2768 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2769 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2770 b = (Mat_MPIAIJ*)B->data; 2771 2772 if (!B->preallocated) { 2773 /* Explicitly create 2 MATSEQAIJ matrices. */ 2774 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2775 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2776 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2777 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2778 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2779 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2780 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2781 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2782 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2784 } 2785 2786 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2787 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2788 B->preallocated = PETSC_TRUE; 2789 PetscFunctionReturn(0); 2790 } 2791 2792 #undef __FUNCT__ 2793 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2794 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2795 { 2796 Mat mat; 2797 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2798 PetscErrorCode ierr; 2799 2800 PetscFunctionBegin; 2801 *newmat = 0; 2802 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2803 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2804 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2805 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2806 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2807 a = (Mat_MPIAIJ*)mat->data; 2808 2809 mat->factortype = matin->factortype; 2810 mat->assembled = PETSC_TRUE; 2811 mat->insertmode = NOT_SET_VALUES; 2812 mat->preallocated = PETSC_TRUE; 2813 2814 a->size = oldmat->size; 2815 a->rank = oldmat->rank; 2816 a->donotstash = oldmat->donotstash; 2817 a->roworiented = oldmat->roworiented; 2818 a->rowindices = 0; 2819 a->rowvalues = 0; 2820 a->getrowactive = PETSC_FALSE; 2821 2822 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2823 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2824 2825 if (oldmat->colmap) { 2826 #if defined(PETSC_USE_CTABLE) 2827 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2828 #else 2829 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2830 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2831 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2832 #endif 2833 } else a->colmap = 0; 2834 if (oldmat->garray) { 2835 PetscInt len; 2836 len = oldmat->B->cmap->n; 2837 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2838 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2839 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2840 } else a->garray = 0; 2841 2842 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2844 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2846 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2848 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2850 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2851 *newmat = mat; 2852 PetscFunctionReturn(0); 2853 } 2854 2855 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatLoad_MPIAIJ" 2859 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2860 { 2861 PetscScalar *vals,*svals; 2862 MPI_Comm comm; 2863 PetscErrorCode ierr; 2864 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2865 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2866 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2867 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2868 PetscInt cend,cstart,n,*rowners; 2869 int fd; 2870 PetscInt bs = newMat->rmap->bs; 2871 2872 PetscFunctionBegin; 2873 /* force binary viewer to load .info file if it has not yet done so */ 2874 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2875 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2878 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2879 if (!rank) { 2880 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2881 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2882 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ"); 2883 } 2884 2885 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2886 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2887 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2888 if (bs < 0) bs = 1; 2889 2890 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2891 M = header[1]; N = header[2]; 2892 2893 /* If global sizes are set, check if they are consistent with that given in the file */ 2894 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2895 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2896 2897 /* determine ownership of all (block) rows */ 2898 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2899 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2900 else m = newMat->rmap->n; /* Set by user */ 2901 2902 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2903 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2904 2905 /* First process needs enough room for process with most rows */ 2906 if (!rank) { 2907 mmax = rowners[1]; 2908 for (i=2; i<=size; i++) { 2909 mmax = PetscMax(mmax, rowners[i]); 2910 } 2911 } else mmax = -1; /* unused, but compilers complain */ 2912 2913 rowners[0] = 0; 2914 for (i=2; i<=size; i++) { 2915 rowners[i] += rowners[i-1]; 2916 } 2917 rstart = rowners[rank]; 2918 rend = rowners[rank+1]; 2919 2920 /* distribute row lengths to all processors */ 2921 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2922 if (!rank) { 2923 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2924 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2925 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2926 for (j=0; j<m; j++) { 2927 procsnz[0] += ourlens[j]; 2928 } 2929 for (i=1; i<size; i++) { 2930 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2931 /* calculate the number of nonzeros on each processor */ 2932 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2933 procsnz[i] += rowlengths[j]; 2934 } 2935 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2936 } 2937 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2938 } else { 2939 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2940 } 2941 2942 if (!rank) { 2943 /* determine max buffer needed and allocate it */ 2944 maxnz = 0; 2945 for (i=0; i<size; i++) { 2946 maxnz = PetscMax(maxnz,procsnz[i]); 2947 } 2948 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2949 2950 /* read in my part of the matrix column indices */ 2951 nz = procsnz[0]; 2952 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2953 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2954 2955 /* read in every one elses and ship off */ 2956 for (i=1; i<size; i++) { 2957 nz = procsnz[i]; 2958 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2959 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2960 } 2961 ierr = PetscFree(cols);CHKERRQ(ierr); 2962 } else { 2963 /* determine buffer space needed for message */ 2964 nz = 0; 2965 for (i=0; i<m; i++) { 2966 nz += ourlens[i]; 2967 } 2968 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2969 2970 /* receive message of column indices*/ 2971 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2972 } 2973 2974 /* determine column ownership if matrix is not square */ 2975 if (N != M) { 2976 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2977 else n = newMat->cmap->n; 2978 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2979 cstart = cend - n; 2980 } else { 2981 cstart = rstart; 2982 cend = rend; 2983 n = cend - cstart; 2984 } 2985 2986 /* loop over local rows, determining number of off diagonal entries */ 2987 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2988 jj = 0; 2989 for (i=0; i<m; i++) { 2990 for (j=0; j<ourlens[i]; j++) { 2991 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2992 jj++; 2993 } 2994 } 2995 2996 for (i=0; i<m; i++) { 2997 ourlens[i] -= offlens[i]; 2998 } 2999 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3000 3001 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3002 3003 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3004 3005 for (i=0; i<m; i++) { 3006 ourlens[i] += offlens[i]; 3007 } 3008 3009 if (!rank) { 3010 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3011 3012 /* read in my part of the matrix numerical values */ 3013 nz = procsnz[0]; 3014 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3015 3016 /* insert into matrix */ 3017 jj = rstart; 3018 smycols = mycols; 3019 svals = vals; 3020 for (i=0; i<m; i++) { 3021 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3022 smycols += ourlens[i]; 3023 svals += ourlens[i]; 3024 jj++; 3025 } 3026 3027 /* read in other processors and ship out */ 3028 for (i=1; i<size; i++) { 3029 nz = procsnz[i]; 3030 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3031 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3032 } 3033 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3034 } else { 3035 /* receive numeric values */ 3036 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3037 3038 /* receive message of values*/ 3039 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3040 3041 /* insert into matrix */ 3042 jj = rstart; 3043 smycols = mycols; 3044 svals = vals; 3045 for (i=0; i<m; i++) { 3046 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3047 smycols += ourlens[i]; 3048 svals += ourlens[i]; 3049 jj++; 3050 } 3051 } 3052 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3053 ierr = PetscFree(vals);CHKERRQ(ierr); 3054 ierr = PetscFree(mycols);CHKERRQ(ierr); 3055 ierr = PetscFree(rowners);CHKERRQ(ierr); 3056 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3058 PetscFunctionReturn(0); 3059 } 3060 3061 #undef __FUNCT__ 3062 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3063 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3064 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3065 { 3066 PetscErrorCode ierr; 3067 IS iscol_local; 3068 PetscInt csize; 3069 3070 PetscFunctionBegin; 3071 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3072 if (call == MAT_REUSE_MATRIX) { 3073 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3074 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3075 } else { 3076 /* check if we are grabbing all columns*/ 3077 PetscBool isstride; 3078 PetscMPIInt lisstride = 0,gisstride; 3079 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3080 if (isstride) { 3081 PetscInt start,len,mstart,mlen; 3082 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3083 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3084 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3085 if (mstart == start && mlen-mstart == len) lisstride = 1; 3086 } 3087 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3088 if (gisstride) { 3089 PetscInt N; 3090 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3091 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3092 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3093 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3094 } else { 3095 PetscInt cbs; 3096 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3097 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3098 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3099 } 3100 } 3101 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3102 if (call == MAT_INITIAL_MATRIX) { 3103 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3104 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3105 } 3106 PetscFunctionReturn(0); 3107 } 3108 3109 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3112 /* 3113 Not great since it makes two copies of the submatrix, first an SeqAIJ 3114 in local and then by concatenating the local matrices the end result. 3115 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3116 3117 Note: This requires a sequential iscol with all indices. 3118 */ 3119 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3120 { 3121 PetscErrorCode ierr; 3122 PetscMPIInt rank,size; 3123 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3124 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3125 PetscBool allcolumns, colflag; 3126 Mat M,Mreuse; 3127 MatScalar *vwork,*aa; 3128 MPI_Comm comm; 3129 Mat_SeqAIJ *aij; 3130 3131 PetscFunctionBegin; 3132 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3133 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3134 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3135 3136 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3137 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3138 if (colflag && ncol == mat->cmap->N) { 3139 allcolumns = PETSC_TRUE; 3140 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3141 } else { 3142 allcolumns = PETSC_FALSE; 3143 } 3144 if (call == MAT_REUSE_MATRIX) { 3145 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3146 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3147 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3148 } else { 3149 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3150 } 3151 3152 /* 3153 m - number of local rows 3154 n - number of columns (same on all processors) 3155 rstart - first row in new global matrix generated 3156 */ 3157 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3158 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3159 if (call == MAT_INITIAL_MATRIX) { 3160 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3161 ii = aij->i; 3162 jj = aij->j; 3163 3164 /* 3165 Determine the number of non-zeros in the diagonal and off-diagonal 3166 portions of the matrix in order to do correct preallocation 3167 */ 3168 3169 /* first get start and end of "diagonal" columns */ 3170 if (csize == PETSC_DECIDE) { 3171 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3172 if (mglobal == n) { /* square matrix */ 3173 nlocal = m; 3174 } else { 3175 nlocal = n/size + ((n % size) > rank); 3176 } 3177 } else { 3178 nlocal = csize; 3179 } 3180 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3181 rstart = rend - nlocal; 3182 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3183 3184 /* next, compute all the lengths */ 3185 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3186 olens = dlens + m; 3187 for (i=0; i<m; i++) { 3188 jend = ii[i+1] - ii[i]; 3189 olen = 0; 3190 dlen = 0; 3191 for (j=0; j<jend; j++) { 3192 if (*jj < rstart || *jj >= rend) olen++; 3193 else dlen++; 3194 jj++; 3195 } 3196 olens[i] = olen; 3197 dlens[i] = dlen; 3198 } 3199 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3200 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3201 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3202 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3203 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3204 ierr = PetscFree(dlens);CHKERRQ(ierr); 3205 } else { 3206 PetscInt ml,nl; 3207 3208 M = *newmat; 3209 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3210 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3211 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3212 /* 3213 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3214 rather than the slower MatSetValues(). 3215 */ 3216 M->was_assembled = PETSC_TRUE; 3217 M->assembled = PETSC_FALSE; 3218 } 3219 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3220 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3221 ii = aij->i; 3222 jj = aij->j; 3223 aa = aij->a; 3224 for (i=0; i<m; i++) { 3225 row = rstart + i; 3226 nz = ii[i+1] - ii[i]; 3227 cwork = jj; jj += nz; 3228 vwork = aa; aa += nz; 3229 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3230 } 3231 3232 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3233 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3234 *newmat = M; 3235 3236 /* save submatrix used in processor for next request */ 3237 if (call == MAT_INITIAL_MATRIX) { 3238 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3239 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3240 } 3241 PetscFunctionReturn(0); 3242 } 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3246 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3247 { 3248 PetscInt m,cstart, cend,j,nnz,i,d; 3249 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3250 const PetscInt *JJ; 3251 PetscScalar *values; 3252 PetscErrorCode ierr; 3253 3254 PetscFunctionBegin; 3255 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3256 3257 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3258 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3259 m = B->rmap->n; 3260 cstart = B->cmap->rstart; 3261 cend = B->cmap->rend; 3262 rstart = B->rmap->rstart; 3263 3264 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3265 3266 #if defined(PETSC_USE_DEBUGGING) 3267 for (i=0; i<m; i++) { 3268 nnz = Ii[i+1]- Ii[i]; 3269 JJ = J + Ii[i]; 3270 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3271 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3272 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3273 } 3274 #endif 3275 3276 for (i=0; i<m; i++) { 3277 nnz = Ii[i+1]- Ii[i]; 3278 JJ = J + Ii[i]; 3279 nnz_max = PetscMax(nnz_max,nnz); 3280 d = 0; 3281 for (j=0; j<nnz; j++) { 3282 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3283 } 3284 d_nnz[i] = d; 3285 o_nnz[i] = nnz - d; 3286 } 3287 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3288 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3289 3290 if (v) values = (PetscScalar*)v; 3291 else { 3292 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3293 } 3294 3295 for (i=0; i<m; i++) { 3296 ii = i + rstart; 3297 nnz = Ii[i+1]- Ii[i]; 3298 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3299 } 3300 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302 3303 if (!v) { 3304 ierr = PetscFree(values);CHKERRQ(ierr); 3305 } 3306 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3307 PetscFunctionReturn(0); 3308 } 3309 3310 #undef __FUNCT__ 3311 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3312 /*@ 3313 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3314 (the default parallel PETSc format). 3315 3316 Collective on MPI_Comm 3317 3318 Input Parameters: 3319 + B - the matrix 3320 . i - the indices into j for the start of each local row (starts with zero) 3321 . j - the column indices for each local row (starts with zero) 3322 - v - optional values in the matrix 3323 3324 Level: developer 3325 3326 Notes: 3327 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3328 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3329 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3330 3331 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3332 3333 The format which is used for the sparse matrix input, is equivalent to a 3334 row-major ordering.. i.e for the following matrix, the input data expected is 3335 as shown 3336 3337 $ 1 0 0 3338 $ 2 0 3 P0 3339 $ ------- 3340 $ 4 5 6 P1 3341 $ 3342 $ Process0 [P0]: rows_owned=[0,1] 3343 $ i = {0,1,3} [size = nrow+1 = 2+1] 3344 $ j = {0,0,2} [size = 3] 3345 $ v = {1,2,3} [size = 3] 3346 $ 3347 $ Process1 [P1]: rows_owned=[2] 3348 $ i = {0,3} [size = nrow+1 = 1+1] 3349 $ j = {0,1,2} [size = 3] 3350 $ v = {4,5,6} [size = 3] 3351 3352 .keywords: matrix, aij, compressed row, sparse, parallel 3353 3354 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3355 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3356 @*/ 3357 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3358 { 3359 PetscErrorCode ierr; 3360 3361 PetscFunctionBegin; 3362 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3363 PetscFunctionReturn(0); 3364 } 3365 3366 #undef __FUNCT__ 3367 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3368 /*@C 3369 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3370 (the default parallel PETSc format). For good matrix assembly performance 3371 the user should preallocate the matrix storage by setting the parameters 3372 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3373 performance can be increased by more than a factor of 50. 3374 3375 Collective on MPI_Comm 3376 3377 Input Parameters: 3378 + B - the matrix 3379 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3380 (same value is used for all local rows) 3381 . d_nnz - array containing the number of nonzeros in the various rows of the 3382 DIAGONAL portion of the local submatrix (possibly different for each row) 3383 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3384 The size of this array is equal to the number of local rows, i.e 'm'. 3385 For matrices that will be factored, you must leave room for (and set) 3386 the diagonal entry even if it is zero. 3387 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3388 submatrix (same value is used for all local rows). 3389 - o_nnz - array containing the number of nonzeros in the various rows of the 3390 OFF-DIAGONAL portion of the local submatrix (possibly different for 3391 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3392 structure. The size of this array is equal to the number 3393 of local rows, i.e 'm'. 3394 3395 If the *_nnz parameter is given then the *_nz parameter is ignored 3396 3397 The AIJ format (also called the Yale sparse matrix format or 3398 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3399 storage. The stored row and column indices begin with zero. 3400 See Users-Manual: ch_mat for details. 3401 3402 The parallel matrix is partitioned such that the first m0 rows belong to 3403 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3404 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3405 3406 The DIAGONAL portion of the local submatrix of a processor can be defined 3407 as the submatrix which is obtained by extraction the part corresponding to 3408 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3409 first row that belongs to the processor, r2 is the last row belonging to 3410 the this processor, and c1-c2 is range of indices of the local part of a 3411 vector suitable for applying the matrix to. This is an mxn matrix. In the 3412 common case of a square matrix, the row and column ranges are the same and 3413 the DIAGONAL part is also square. The remaining portion of the local 3414 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3415 3416 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3417 3418 You can call MatGetInfo() to get information on how effective the preallocation was; 3419 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3420 You can also run with the option -info and look for messages with the string 3421 malloc in them to see if additional memory allocation was needed. 3422 3423 Example usage: 3424 3425 Consider the following 8x8 matrix with 34 non-zero values, that is 3426 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3427 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3428 as follows: 3429 3430 .vb 3431 1 2 0 | 0 3 0 | 0 4 3432 Proc0 0 5 6 | 7 0 0 | 8 0 3433 9 0 10 | 11 0 0 | 12 0 3434 ------------------------------------- 3435 13 0 14 | 15 16 17 | 0 0 3436 Proc1 0 18 0 | 19 20 21 | 0 0 3437 0 0 0 | 22 23 0 | 24 0 3438 ------------------------------------- 3439 Proc2 25 26 27 | 0 0 28 | 29 0 3440 30 0 0 | 31 32 33 | 0 34 3441 .ve 3442 3443 This can be represented as a collection of submatrices as: 3444 3445 .vb 3446 A B C 3447 D E F 3448 G H I 3449 .ve 3450 3451 Where the submatrices A,B,C are owned by proc0, D,E,F are 3452 owned by proc1, G,H,I are owned by proc2. 3453 3454 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3455 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3456 The 'M','N' parameters are 8,8, and have the same values on all procs. 3457 3458 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3459 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3460 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3461 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3462 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3463 matrix, ans [DF] as another SeqAIJ matrix. 3464 3465 When d_nz, o_nz parameters are specified, d_nz storage elements are 3466 allocated for every row of the local diagonal submatrix, and o_nz 3467 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3468 One way to choose d_nz and o_nz is to use the max nonzerors per local 3469 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3470 In this case, the values of d_nz,o_nz are: 3471 .vb 3472 proc0 : dnz = 2, o_nz = 2 3473 proc1 : dnz = 3, o_nz = 2 3474 proc2 : dnz = 1, o_nz = 4 3475 .ve 3476 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3477 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3478 for proc3. i.e we are using 12+15+10=37 storage locations to store 3479 34 values. 3480 3481 When d_nnz, o_nnz parameters are specified, the storage is specified 3482 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3483 In the above case the values for d_nnz,o_nnz are: 3484 .vb 3485 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3486 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3487 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3488 .ve 3489 Here the space allocated is sum of all the above values i.e 34, and 3490 hence pre-allocation is perfect. 3491 3492 Level: intermediate 3493 3494 .keywords: matrix, aij, compressed row, sparse, parallel 3495 3496 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3497 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3498 @*/ 3499 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3500 { 3501 PetscErrorCode ierr; 3502 3503 PetscFunctionBegin; 3504 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3505 PetscValidType(B,1); 3506 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3507 PetscFunctionReturn(0); 3508 } 3509 3510 #undef __FUNCT__ 3511 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3512 /*@ 3513 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3514 CSR format the local rows. 3515 3516 Collective on MPI_Comm 3517 3518 Input Parameters: 3519 + comm - MPI communicator 3520 . m - number of local rows (Cannot be PETSC_DECIDE) 3521 . n - This value should be the same as the local size used in creating the 3522 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3523 calculated if N is given) For square matrices n is almost always m. 3524 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3525 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3526 . i - row indices 3527 . j - column indices 3528 - a - matrix values 3529 3530 Output Parameter: 3531 . mat - the matrix 3532 3533 Level: intermediate 3534 3535 Notes: 3536 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3537 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3538 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3539 3540 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3541 3542 The format which is used for the sparse matrix input, is equivalent to a 3543 row-major ordering.. i.e for the following matrix, the input data expected is 3544 as shown 3545 3546 $ 1 0 0 3547 $ 2 0 3 P0 3548 $ ------- 3549 $ 4 5 6 P1 3550 $ 3551 $ Process0 [P0]: rows_owned=[0,1] 3552 $ i = {0,1,3} [size = nrow+1 = 2+1] 3553 $ j = {0,0,2} [size = 3] 3554 $ v = {1,2,3} [size = 3] 3555 $ 3556 $ Process1 [P1]: rows_owned=[2] 3557 $ i = {0,3} [size = nrow+1 = 1+1] 3558 $ j = {0,1,2} [size = 3] 3559 $ v = {4,5,6} [size = 3] 3560 3561 .keywords: matrix, aij, compressed row, sparse, parallel 3562 3563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3564 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3565 @*/ 3566 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3567 { 3568 PetscErrorCode ierr; 3569 3570 PetscFunctionBegin; 3571 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3572 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3573 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3574 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3575 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3576 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3577 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3578 PetscFunctionReturn(0); 3579 } 3580 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "MatCreateAIJ" 3583 /*@C 3584 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3585 (the default parallel PETSc format). For good matrix assembly performance 3586 the user should preallocate the matrix storage by setting the parameters 3587 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3588 performance can be increased by more than a factor of 50. 3589 3590 Collective on MPI_Comm 3591 3592 Input Parameters: 3593 + comm - MPI communicator 3594 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3595 This value should be the same as the local size used in creating the 3596 y vector for the matrix-vector product y = Ax. 3597 . n - This value should be the same as the local size used in creating the 3598 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3599 calculated if N is given) For square matrices n is almost always m. 3600 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3601 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3602 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3603 (same value is used for all local rows) 3604 . d_nnz - array containing the number of nonzeros in the various rows of the 3605 DIAGONAL portion of the local submatrix (possibly different for each row) 3606 or NULL, if d_nz is used to specify the nonzero structure. 3607 The size of this array is equal to the number of local rows, i.e 'm'. 3608 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3609 submatrix (same value is used for all local rows). 3610 - o_nnz - array containing the number of nonzeros in the various rows of the 3611 OFF-DIAGONAL portion of the local submatrix (possibly different for 3612 each row) or NULL, if o_nz is used to specify the nonzero 3613 structure. The size of this array is equal to the number 3614 of local rows, i.e 'm'. 3615 3616 Output Parameter: 3617 . A - the matrix 3618 3619 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3620 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3621 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3622 3623 Notes: 3624 If the *_nnz parameter is given then the *_nz parameter is ignored 3625 3626 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3627 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3628 storage requirements for this matrix. 3629 3630 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3631 processor than it must be used on all processors that share the object for 3632 that argument. 3633 3634 The user MUST specify either the local or global matrix dimensions 3635 (possibly both). 3636 3637 The parallel matrix is partitioned across processors such that the 3638 first m0 rows belong to process 0, the next m1 rows belong to 3639 process 1, the next m2 rows belong to process 2 etc.. where 3640 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3641 values corresponding to [m x N] submatrix. 3642 3643 The columns are logically partitioned with the n0 columns belonging 3644 to 0th partition, the next n1 columns belonging to the next 3645 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3646 3647 The DIAGONAL portion of the local submatrix on any given processor 3648 is the submatrix corresponding to the rows and columns m,n 3649 corresponding to the given processor. i.e diagonal matrix on 3650 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3651 etc. The remaining portion of the local submatrix [m x (N-n)] 3652 constitute the OFF-DIAGONAL portion. The example below better 3653 illustrates this concept. 3654 3655 For a square global matrix we define each processor's diagonal portion 3656 to be its local rows and the corresponding columns (a square submatrix); 3657 each processor's off-diagonal portion encompasses the remainder of the 3658 local matrix (a rectangular submatrix). 3659 3660 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3661 3662 When calling this routine with a single process communicator, a matrix of 3663 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3664 type of communicator, use the construction mechanism: 3665 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3666 3667 By default, this format uses inodes (identical nodes) when possible. 3668 We search for consecutive rows with the same nonzero structure, thereby 3669 reusing matrix information to achieve increased efficiency. 3670 3671 Options Database Keys: 3672 + -mat_no_inode - Do not use inodes 3673 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3674 - -mat_aij_oneindex - Internally use indexing starting at 1 3675 rather than 0. Note that when calling MatSetValues(), 3676 the user still MUST index entries starting at 0! 3677 3678 3679 Example usage: 3680 3681 Consider the following 8x8 matrix with 34 non-zero values, that is 3682 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3683 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3684 as follows: 3685 3686 .vb 3687 1 2 0 | 0 3 0 | 0 4 3688 Proc0 0 5 6 | 7 0 0 | 8 0 3689 9 0 10 | 11 0 0 | 12 0 3690 ------------------------------------- 3691 13 0 14 | 15 16 17 | 0 0 3692 Proc1 0 18 0 | 19 20 21 | 0 0 3693 0 0 0 | 22 23 0 | 24 0 3694 ------------------------------------- 3695 Proc2 25 26 27 | 0 0 28 | 29 0 3696 30 0 0 | 31 32 33 | 0 34 3697 .ve 3698 3699 This can be represented as a collection of submatrices as: 3700 3701 .vb 3702 A B C 3703 D E F 3704 G H I 3705 .ve 3706 3707 Where the submatrices A,B,C are owned by proc0, D,E,F are 3708 owned by proc1, G,H,I are owned by proc2. 3709 3710 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3711 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3712 The 'M','N' parameters are 8,8, and have the same values on all procs. 3713 3714 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3715 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3716 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3717 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3718 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3719 matrix, ans [DF] as another SeqAIJ matrix. 3720 3721 When d_nz, o_nz parameters are specified, d_nz storage elements are 3722 allocated for every row of the local diagonal submatrix, and o_nz 3723 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3724 One way to choose d_nz and o_nz is to use the max nonzerors per local 3725 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3726 In this case, the values of d_nz,o_nz are: 3727 .vb 3728 proc0 : dnz = 2, o_nz = 2 3729 proc1 : dnz = 3, o_nz = 2 3730 proc2 : dnz = 1, o_nz = 4 3731 .ve 3732 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3733 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3734 for proc3. i.e we are using 12+15+10=37 storage locations to store 3735 34 values. 3736 3737 When d_nnz, o_nnz parameters are specified, the storage is specified 3738 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3739 In the above case the values for d_nnz,o_nnz are: 3740 .vb 3741 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3742 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3743 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3744 .ve 3745 Here the space allocated is sum of all the above values i.e 34, and 3746 hence pre-allocation is perfect. 3747 3748 Level: intermediate 3749 3750 .keywords: matrix, aij, compressed row, sparse, parallel 3751 3752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3753 MPIAIJ, MatCreateMPIAIJWithArrays() 3754 @*/ 3755 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3756 { 3757 PetscErrorCode ierr; 3758 PetscMPIInt size; 3759 3760 PetscFunctionBegin; 3761 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3762 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3763 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3764 if (size > 1) { 3765 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3766 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3767 } else { 3768 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3769 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3770 } 3771 PetscFunctionReturn(0); 3772 } 3773 3774 #undef __FUNCT__ 3775 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3776 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3777 { 3778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3779 PetscBool flg; 3780 PetscErrorCode ierr; 3781 3782 PetscFunctionBegin; 3783 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3784 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input"); 3785 if (Ad) *Ad = a->A; 3786 if (Ao) *Ao = a->B; 3787 if (colmap) *colmap = a->garray; 3788 PetscFunctionReturn(0); 3789 } 3790 3791 #undef __FUNCT__ 3792 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3793 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3794 { 3795 PetscErrorCode ierr; 3796 PetscInt i; 3797 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3798 3799 PetscFunctionBegin; 3800 if (coloring->ctype == IS_COLORING_GLOBAL) { 3801 ISColoringValue *allcolors,*colors; 3802 ISColoring ocoloring; 3803 3804 /* set coloring for diagonal portion */ 3805 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3806 3807 /* set coloring for off-diagonal portion */ 3808 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3809 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3810 for (i=0; i<a->B->cmap->n; i++) { 3811 colors[i] = allcolors[a->garray[i]]; 3812 } 3813 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3814 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3815 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3816 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3817 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3818 ISColoringValue *colors; 3819 PetscInt *larray; 3820 ISColoring ocoloring; 3821 3822 /* set coloring for diagonal portion */ 3823 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3824 for (i=0; i<a->A->cmap->n; i++) { 3825 larray[i] = i + A->cmap->rstart; 3826 } 3827 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3828 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3829 for (i=0; i<a->A->cmap->n; i++) { 3830 colors[i] = coloring->colors[larray[i]]; 3831 } 3832 ierr = PetscFree(larray);CHKERRQ(ierr); 3833 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3834 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3835 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3836 3837 /* set coloring for off-diagonal portion */ 3838 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3839 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3840 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3841 for (i=0; i<a->B->cmap->n; i++) { 3842 colors[i] = coloring->colors[larray[i]]; 3843 } 3844 ierr = PetscFree(larray);CHKERRQ(ierr); 3845 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3846 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3847 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3848 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3849 PetscFunctionReturn(0); 3850 } 3851 3852 #undef __FUNCT__ 3853 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3854 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3855 { 3856 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3857 PetscErrorCode ierr; 3858 3859 PetscFunctionBegin; 3860 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3861 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3862 PetscFunctionReturn(0); 3863 } 3864 3865 #undef __FUNCT__ 3866 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3867 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3868 { 3869 PetscErrorCode ierr; 3870 PetscInt m,N,i,rstart,nnz,Ii; 3871 PetscInt *indx; 3872 PetscScalar *values; 3873 3874 PetscFunctionBegin; 3875 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3876 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3877 PetscInt *dnz,*onz,sum,bs,cbs; 3878 3879 if (n == PETSC_DECIDE) { 3880 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3881 } 3882 /* Check sum(n) = N */ 3883 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3884 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3885 3886 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3887 rstart -= m; 3888 3889 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3890 for (i=0; i<m; i++) { 3891 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3892 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3893 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3894 } 3895 3896 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3897 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3898 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3899 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3900 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3901 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3902 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3903 } 3904 3905 /* numeric phase */ 3906 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3907 for (i=0; i<m; i++) { 3908 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3909 Ii = i + rstart; 3910 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3911 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3912 } 3913 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3914 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3915 PetscFunctionReturn(0); 3916 } 3917 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "MatFileSplit" 3920 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3921 { 3922 PetscErrorCode ierr; 3923 PetscMPIInt rank; 3924 PetscInt m,N,i,rstart,nnz; 3925 size_t len; 3926 const PetscInt *indx; 3927 PetscViewer out; 3928 char *name; 3929 Mat B; 3930 const PetscScalar *values; 3931 3932 PetscFunctionBegin; 3933 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3934 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3935 /* Should this be the type of the diagonal block of A? */ 3936 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3937 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3938 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3939 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3940 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3941 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3942 for (i=0; i<m; i++) { 3943 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3944 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3945 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3946 } 3947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3949 3950 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3951 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3952 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3953 sprintf(name,"%s.%d",outfile,rank); 3954 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3955 ierr = PetscFree(name);CHKERRQ(ierr); 3956 ierr = MatView(B,out);CHKERRQ(ierr); 3957 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3958 ierr = MatDestroy(&B);CHKERRQ(ierr); 3959 PetscFunctionReturn(0); 3960 } 3961 3962 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3963 #undef __FUNCT__ 3964 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3965 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3966 { 3967 PetscErrorCode ierr; 3968 Mat_Merge_SeqsToMPI *merge; 3969 PetscContainer container; 3970 3971 PetscFunctionBegin; 3972 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3973 if (container) { 3974 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3975 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3976 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3977 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3978 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3979 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3980 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3981 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3982 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3983 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3984 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3985 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3986 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3987 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3988 ierr = PetscFree(merge);CHKERRQ(ierr); 3989 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3990 } 3991 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3992 PetscFunctionReturn(0); 3993 } 3994 3995 #include <../src/mat/utils/freespace.h> 3996 #include <petscbt.h> 3997 3998 #undef __FUNCT__ 3999 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4000 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4001 { 4002 PetscErrorCode ierr; 4003 MPI_Comm comm; 4004 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4005 PetscMPIInt size,rank,taga,*len_s; 4006 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4007 PetscInt proc,m; 4008 PetscInt **buf_ri,**buf_rj; 4009 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4010 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4011 MPI_Request *s_waits,*r_waits; 4012 MPI_Status *status; 4013 MatScalar *aa=a->a; 4014 MatScalar **abuf_r,*ba_i; 4015 Mat_Merge_SeqsToMPI *merge; 4016 PetscContainer container; 4017 4018 PetscFunctionBegin; 4019 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4020 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4021 4022 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4023 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4024 4025 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4026 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4027 4028 bi = merge->bi; 4029 bj = merge->bj; 4030 buf_ri = merge->buf_ri; 4031 buf_rj = merge->buf_rj; 4032 4033 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4034 owners = merge->rowmap->range; 4035 len_s = merge->len_s; 4036 4037 /* send and recv matrix values */ 4038 /*-----------------------------*/ 4039 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4040 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4041 4042 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4043 for (proc=0,k=0; proc<size; proc++) { 4044 if (!len_s[proc]) continue; 4045 i = owners[proc]; 4046 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4047 k++; 4048 } 4049 4050 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4051 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4052 ierr = PetscFree(status);CHKERRQ(ierr); 4053 4054 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4055 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4056 4057 /* insert mat values of mpimat */ 4058 /*----------------------------*/ 4059 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4060 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4061 4062 for (k=0; k<merge->nrecv; k++) { 4063 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4064 nrows = *(buf_ri_k[k]); 4065 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4066 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4067 } 4068 4069 /* set values of ba */ 4070 m = merge->rowmap->n; 4071 for (i=0; i<m; i++) { 4072 arow = owners[rank] + i; 4073 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4074 bnzi = bi[i+1] - bi[i]; 4075 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4076 4077 /* add local non-zero vals of this proc's seqmat into ba */ 4078 anzi = ai[arow+1] - ai[arow]; 4079 aj = a->j + ai[arow]; 4080 aa = a->a + ai[arow]; 4081 nextaj = 0; 4082 for (j=0; nextaj<anzi; j++) { 4083 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4084 ba_i[j] += aa[nextaj++]; 4085 } 4086 } 4087 4088 /* add received vals into ba */ 4089 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4090 /* i-th row */ 4091 if (i == *nextrow[k]) { 4092 anzi = *(nextai[k]+1) - *nextai[k]; 4093 aj = buf_rj[k] + *(nextai[k]); 4094 aa = abuf_r[k] + *(nextai[k]); 4095 nextaj = 0; 4096 for (j=0; nextaj<anzi; j++) { 4097 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4098 ba_i[j] += aa[nextaj++]; 4099 } 4100 } 4101 nextrow[k]++; nextai[k]++; 4102 } 4103 } 4104 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4105 } 4106 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4107 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4108 4109 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4110 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4111 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4112 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4113 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4114 PetscFunctionReturn(0); 4115 } 4116 4117 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4118 4119 #undef __FUNCT__ 4120 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4121 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4122 { 4123 PetscErrorCode ierr; 4124 Mat B_mpi; 4125 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4126 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4127 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4128 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4129 PetscInt len,proc,*dnz,*onz,bs,cbs; 4130 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4131 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4132 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4133 MPI_Status *status; 4134 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4135 PetscBT lnkbt; 4136 Mat_Merge_SeqsToMPI *merge; 4137 PetscContainer container; 4138 4139 PetscFunctionBegin; 4140 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4141 4142 /* make sure it is a PETSc comm */ 4143 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4144 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4145 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4146 4147 ierr = PetscNew(&merge);CHKERRQ(ierr); 4148 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4149 4150 /* determine row ownership */ 4151 /*---------------------------------------------------------*/ 4152 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4153 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4154 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4155 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4156 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4157 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4158 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4159 4160 m = merge->rowmap->n; 4161 owners = merge->rowmap->range; 4162 4163 /* determine the number of messages to send, their lengths */ 4164 /*---------------------------------------------------------*/ 4165 len_s = merge->len_s; 4166 4167 len = 0; /* length of buf_si[] */ 4168 merge->nsend = 0; 4169 for (proc=0; proc<size; proc++) { 4170 len_si[proc] = 0; 4171 if (proc == rank) { 4172 len_s[proc] = 0; 4173 } else { 4174 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4175 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4176 } 4177 if (len_s[proc]) { 4178 merge->nsend++; 4179 nrows = 0; 4180 for (i=owners[proc]; i<owners[proc+1]; i++) { 4181 if (ai[i+1] > ai[i]) nrows++; 4182 } 4183 len_si[proc] = 2*(nrows+1); 4184 len += len_si[proc]; 4185 } 4186 } 4187 4188 /* determine the number and length of messages to receive for ij-structure */ 4189 /*-------------------------------------------------------------------------*/ 4190 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4191 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4192 4193 /* post the Irecv of j-structure */ 4194 /*-------------------------------*/ 4195 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4196 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4197 4198 /* post the Isend of j-structure */ 4199 /*--------------------------------*/ 4200 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4201 4202 for (proc=0, k=0; proc<size; proc++) { 4203 if (!len_s[proc]) continue; 4204 i = owners[proc]; 4205 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4206 k++; 4207 } 4208 4209 /* receives and sends of j-structure are complete */ 4210 /*------------------------------------------------*/ 4211 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4212 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4213 4214 /* send and recv i-structure */ 4215 /*---------------------------*/ 4216 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4217 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4218 4219 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4220 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4221 for (proc=0,k=0; proc<size; proc++) { 4222 if (!len_s[proc]) continue; 4223 /* form outgoing message for i-structure: 4224 buf_si[0]: nrows to be sent 4225 [1:nrows]: row index (global) 4226 [nrows+1:2*nrows+1]: i-structure index 4227 */ 4228 /*-------------------------------------------*/ 4229 nrows = len_si[proc]/2 - 1; 4230 buf_si_i = buf_si + nrows+1; 4231 buf_si[0] = nrows; 4232 buf_si_i[0] = 0; 4233 nrows = 0; 4234 for (i=owners[proc]; i<owners[proc+1]; i++) { 4235 anzi = ai[i+1] - ai[i]; 4236 if (anzi) { 4237 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4238 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4239 nrows++; 4240 } 4241 } 4242 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4243 k++; 4244 buf_si += len_si[proc]; 4245 } 4246 4247 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4248 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4249 4250 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4251 for (i=0; i<merge->nrecv; i++) { 4252 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4253 } 4254 4255 ierr = PetscFree(len_si);CHKERRQ(ierr); 4256 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4257 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4258 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4259 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4260 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4261 ierr = PetscFree(status);CHKERRQ(ierr); 4262 4263 /* compute a local seq matrix in each processor */ 4264 /*----------------------------------------------*/ 4265 /* allocate bi array and free space for accumulating nonzero column info */ 4266 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4267 bi[0] = 0; 4268 4269 /* create and initialize a linked list */ 4270 nlnk = N+1; 4271 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4272 4273 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4274 len = ai[owners[rank+1]] - ai[owners[rank]]; 4275 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4276 4277 current_space = free_space; 4278 4279 /* determine symbolic info for each local row */ 4280 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4281 4282 for (k=0; k<merge->nrecv; k++) { 4283 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4284 nrows = *buf_ri_k[k]; 4285 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4286 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4287 } 4288 4289 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4290 len = 0; 4291 for (i=0; i<m; i++) { 4292 bnzi = 0; 4293 /* add local non-zero cols of this proc's seqmat into lnk */ 4294 arow = owners[rank] + i; 4295 anzi = ai[arow+1] - ai[arow]; 4296 aj = a->j + ai[arow]; 4297 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4298 bnzi += nlnk; 4299 /* add received col data into lnk */ 4300 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4301 if (i == *nextrow[k]) { /* i-th row */ 4302 anzi = *(nextai[k]+1) - *nextai[k]; 4303 aj = buf_rj[k] + *nextai[k]; 4304 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4305 bnzi += nlnk; 4306 nextrow[k]++; nextai[k]++; 4307 } 4308 } 4309 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4310 4311 /* if free space is not available, make more free space */ 4312 if (current_space->local_remaining<bnzi) { 4313 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4314 nspacedouble++; 4315 } 4316 /* copy data into free space, then initialize lnk */ 4317 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4318 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4319 4320 current_space->array += bnzi; 4321 current_space->local_used += bnzi; 4322 current_space->local_remaining -= bnzi; 4323 4324 bi[i+1] = bi[i] + bnzi; 4325 } 4326 4327 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4328 4329 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4330 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4331 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4332 4333 /* create symbolic parallel matrix B_mpi */ 4334 /*---------------------------------------*/ 4335 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4336 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4337 if (n==PETSC_DECIDE) { 4338 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4339 } else { 4340 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4341 } 4342 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4343 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4344 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4345 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4346 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4347 4348 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4349 B_mpi->assembled = PETSC_FALSE; 4350 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4351 merge->bi = bi; 4352 merge->bj = bj; 4353 merge->buf_ri = buf_ri; 4354 merge->buf_rj = buf_rj; 4355 merge->coi = NULL; 4356 merge->coj = NULL; 4357 merge->owners_co = NULL; 4358 4359 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4360 4361 /* attach the supporting struct to B_mpi for reuse */ 4362 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4363 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4364 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4365 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4366 *mpimat = B_mpi; 4367 4368 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4369 PetscFunctionReturn(0); 4370 } 4371 4372 #undef __FUNCT__ 4373 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4374 /*@C 4375 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4376 matrices from each processor 4377 4378 Collective on MPI_Comm 4379 4380 Input Parameters: 4381 + comm - the communicators the parallel matrix will live on 4382 . seqmat - the input sequential matrices 4383 . m - number of local rows (or PETSC_DECIDE) 4384 . n - number of local columns (or PETSC_DECIDE) 4385 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4386 4387 Output Parameter: 4388 . mpimat - the parallel matrix generated 4389 4390 Level: advanced 4391 4392 Notes: 4393 The dimensions of the sequential matrix in each processor MUST be the same. 4394 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4395 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4396 @*/ 4397 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4398 { 4399 PetscErrorCode ierr; 4400 PetscMPIInt size; 4401 4402 PetscFunctionBegin; 4403 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4404 if (size == 1) { 4405 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4406 if (scall == MAT_INITIAL_MATRIX) { 4407 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4408 } else { 4409 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4410 } 4411 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4412 PetscFunctionReturn(0); 4413 } 4414 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4415 if (scall == MAT_INITIAL_MATRIX) { 4416 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4417 } 4418 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4419 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4425 /*@ 4426 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4427 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4428 with MatGetSize() 4429 4430 Not Collective 4431 4432 Input Parameters: 4433 + A - the matrix 4434 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4435 4436 Output Parameter: 4437 . A_loc - the local sequential matrix generated 4438 4439 Level: developer 4440 4441 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4442 4443 @*/ 4444 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4445 { 4446 PetscErrorCode ierr; 4447 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4448 Mat_SeqAIJ *mat,*a,*b; 4449 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4450 MatScalar *aa,*ba,*cam; 4451 PetscScalar *ca; 4452 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4453 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4454 PetscBool match; 4455 MPI_Comm comm; 4456 PetscMPIInt size; 4457 4458 PetscFunctionBegin; 4459 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4460 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4461 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4462 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4463 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4464 4465 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4466 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4467 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4468 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4469 aa = a->a; ba = b->a; 4470 if (scall == MAT_INITIAL_MATRIX) { 4471 if (size == 1) { 4472 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4473 PetscFunctionReturn(0); 4474 } 4475 4476 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4477 ci[0] = 0; 4478 for (i=0; i<am; i++) { 4479 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4480 } 4481 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4482 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4483 k = 0; 4484 for (i=0; i<am; i++) { 4485 ncols_o = bi[i+1] - bi[i]; 4486 ncols_d = ai[i+1] - ai[i]; 4487 /* off-diagonal portion of A */ 4488 for (jo=0; jo<ncols_o; jo++) { 4489 col = cmap[*bj]; 4490 if (col >= cstart) break; 4491 cj[k] = col; bj++; 4492 ca[k++] = *ba++; 4493 } 4494 /* diagonal portion of A */ 4495 for (j=0; j<ncols_d; j++) { 4496 cj[k] = cstart + *aj++; 4497 ca[k++] = *aa++; 4498 } 4499 /* off-diagonal portion of A */ 4500 for (j=jo; j<ncols_o; j++) { 4501 cj[k] = cmap[*bj++]; 4502 ca[k++] = *ba++; 4503 } 4504 } 4505 /* put together the new matrix */ 4506 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4507 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4508 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4509 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4510 mat->free_a = PETSC_TRUE; 4511 mat->free_ij = PETSC_TRUE; 4512 mat->nonew = 0; 4513 } else if (scall == MAT_REUSE_MATRIX) { 4514 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4515 ci = mat->i; cj = mat->j; cam = mat->a; 4516 for (i=0; i<am; i++) { 4517 /* off-diagonal portion of A */ 4518 ncols_o = bi[i+1] - bi[i]; 4519 for (jo=0; jo<ncols_o; jo++) { 4520 col = cmap[*bj]; 4521 if (col >= cstart) break; 4522 *cam++ = *ba++; bj++; 4523 } 4524 /* diagonal portion of A */ 4525 ncols_d = ai[i+1] - ai[i]; 4526 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4527 /* off-diagonal portion of A */ 4528 for (j=jo; j<ncols_o; j++) { 4529 *cam++ = *ba++; bj++; 4530 } 4531 } 4532 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4533 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4534 PetscFunctionReturn(0); 4535 } 4536 4537 #undef __FUNCT__ 4538 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4539 /*@C 4540 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4541 4542 Not Collective 4543 4544 Input Parameters: 4545 + A - the matrix 4546 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4547 - row, col - index sets of rows and columns to extract (or NULL) 4548 4549 Output Parameter: 4550 . A_loc - the local sequential matrix generated 4551 4552 Level: developer 4553 4554 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4555 4556 @*/ 4557 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4558 { 4559 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4560 PetscErrorCode ierr; 4561 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4562 IS isrowa,iscola; 4563 Mat *aloc; 4564 PetscBool match; 4565 4566 PetscFunctionBegin; 4567 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4568 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4569 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4570 if (!row) { 4571 start = A->rmap->rstart; end = A->rmap->rend; 4572 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4573 } else { 4574 isrowa = *row; 4575 } 4576 if (!col) { 4577 start = A->cmap->rstart; 4578 cmap = a->garray; 4579 nzA = a->A->cmap->n; 4580 nzB = a->B->cmap->n; 4581 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4582 ncols = 0; 4583 for (i=0; i<nzB; i++) { 4584 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4585 else break; 4586 } 4587 imark = i; 4588 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4589 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4590 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4591 } else { 4592 iscola = *col; 4593 } 4594 if (scall != MAT_INITIAL_MATRIX) { 4595 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4596 aloc[0] = *A_loc; 4597 } 4598 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4599 *A_loc = aloc[0]; 4600 ierr = PetscFree(aloc);CHKERRQ(ierr); 4601 if (!row) { 4602 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4603 } 4604 if (!col) { 4605 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4606 } 4607 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4608 PetscFunctionReturn(0); 4609 } 4610 4611 #undef __FUNCT__ 4612 #define __FUNCT__ "MatGetBrowsOfAcols" 4613 /*@C 4614 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4615 4616 Collective on Mat 4617 4618 Input Parameters: 4619 + A,B - the matrices in mpiaij format 4620 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4621 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4622 4623 Output Parameter: 4624 + rowb, colb - index sets of rows and columns of B to extract 4625 - B_seq - the sequential matrix generated 4626 4627 Level: developer 4628 4629 @*/ 4630 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4631 { 4632 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4633 PetscErrorCode ierr; 4634 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4635 IS isrowb,iscolb; 4636 Mat *bseq=NULL; 4637 4638 PetscFunctionBegin; 4639 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4640 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4641 } 4642 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4643 4644 if (scall == MAT_INITIAL_MATRIX) { 4645 start = A->cmap->rstart; 4646 cmap = a->garray; 4647 nzA = a->A->cmap->n; 4648 nzB = a->B->cmap->n; 4649 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4650 ncols = 0; 4651 for (i=0; i<nzB; i++) { /* row < local row index */ 4652 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4653 else break; 4654 } 4655 imark = i; 4656 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4657 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4658 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4659 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4660 } else { 4661 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4662 isrowb = *rowb; iscolb = *colb; 4663 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4664 bseq[0] = *B_seq; 4665 } 4666 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4667 *B_seq = bseq[0]; 4668 ierr = PetscFree(bseq);CHKERRQ(ierr); 4669 if (!rowb) { 4670 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4671 } else { 4672 *rowb = isrowb; 4673 } 4674 if (!colb) { 4675 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4676 } else { 4677 *colb = iscolb; 4678 } 4679 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4680 PetscFunctionReturn(0); 4681 } 4682 4683 #undef __FUNCT__ 4684 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4685 /* 4686 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4687 of the OFF-DIAGONAL portion of local A 4688 4689 Collective on Mat 4690 4691 Input Parameters: 4692 + A,B - the matrices in mpiaij format 4693 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4694 4695 Output Parameter: 4696 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4697 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4698 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4699 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4700 4701 Level: developer 4702 4703 */ 4704 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4705 { 4706 VecScatter_MPI_General *gen_to,*gen_from; 4707 PetscErrorCode ierr; 4708 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4709 Mat_SeqAIJ *b_oth; 4710 VecScatter ctx =a->Mvctx; 4711 MPI_Comm comm; 4712 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4713 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4714 PetscScalar *rvalues,*svalues; 4715 MatScalar *b_otha,*bufa,*bufA; 4716 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4717 MPI_Request *rwaits = NULL,*swaits = NULL; 4718 MPI_Status *sstatus,rstatus; 4719 PetscMPIInt jj,size; 4720 PetscInt *cols,sbs,rbs; 4721 PetscScalar *vals; 4722 4723 PetscFunctionBegin; 4724 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4725 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4726 4727 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4728 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4729 } 4730 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4731 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4732 4733 gen_to = (VecScatter_MPI_General*)ctx->todata; 4734 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4735 rvalues = gen_from->values; /* holds the length of receiving row */ 4736 svalues = gen_to->values; /* holds the length of sending row */ 4737 nrecvs = gen_from->n; 4738 nsends = gen_to->n; 4739 4740 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4741 srow = gen_to->indices; /* local row index to be sent */ 4742 sstarts = gen_to->starts; 4743 sprocs = gen_to->procs; 4744 sstatus = gen_to->sstatus; 4745 sbs = gen_to->bs; 4746 rstarts = gen_from->starts; 4747 rprocs = gen_from->procs; 4748 rbs = gen_from->bs; 4749 4750 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4751 if (scall == MAT_INITIAL_MATRIX) { 4752 /* i-array */ 4753 /*---------*/ 4754 /* post receives */ 4755 for (i=0; i<nrecvs; i++) { 4756 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4757 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4758 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4759 } 4760 4761 /* pack the outgoing message */ 4762 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4763 4764 sstartsj[0] = 0; 4765 rstartsj[0] = 0; 4766 len = 0; /* total length of j or a array to be sent */ 4767 k = 0; 4768 for (i=0; i<nsends; i++) { 4769 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4770 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4771 for (j=0; j<nrows; j++) { 4772 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4773 for (l=0; l<sbs; l++) { 4774 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4775 4776 rowlen[j*sbs+l] = ncols; 4777 4778 len += ncols; 4779 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4780 } 4781 k++; 4782 } 4783 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4784 4785 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4786 } 4787 /* recvs and sends of i-array are completed */ 4788 i = nrecvs; 4789 while (i--) { 4790 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4791 } 4792 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4793 4794 /* allocate buffers for sending j and a arrays */ 4795 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4796 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4797 4798 /* create i-array of B_oth */ 4799 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4800 4801 b_othi[0] = 0; 4802 len = 0; /* total length of j or a array to be received */ 4803 k = 0; 4804 for (i=0; i<nrecvs; i++) { 4805 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4806 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4807 for (j=0; j<nrows; j++) { 4808 b_othi[k+1] = b_othi[k] + rowlen[j]; 4809 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4810 k++; 4811 } 4812 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4813 } 4814 4815 /* allocate space for j and a arrrays of B_oth */ 4816 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4817 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4818 4819 /* j-array */ 4820 /*---------*/ 4821 /* post receives of j-array */ 4822 for (i=0; i<nrecvs; i++) { 4823 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4824 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4825 } 4826 4827 /* pack the outgoing message j-array */ 4828 k = 0; 4829 for (i=0; i<nsends; i++) { 4830 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4831 bufJ = bufj+sstartsj[i]; 4832 for (j=0; j<nrows; j++) { 4833 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4834 for (ll=0; ll<sbs; ll++) { 4835 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4836 for (l=0; l<ncols; l++) { 4837 *bufJ++ = cols[l]; 4838 } 4839 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4840 } 4841 } 4842 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4843 } 4844 4845 /* recvs and sends of j-array are completed */ 4846 i = nrecvs; 4847 while (i--) { 4848 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4849 } 4850 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4851 } else if (scall == MAT_REUSE_MATRIX) { 4852 sstartsj = *startsj_s; 4853 rstartsj = *startsj_r; 4854 bufa = *bufa_ptr; 4855 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4856 b_otha = b_oth->a; 4857 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4858 4859 /* a-array */ 4860 /*---------*/ 4861 /* post receives of a-array */ 4862 for (i=0; i<nrecvs; i++) { 4863 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4864 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4865 } 4866 4867 /* pack the outgoing message a-array */ 4868 k = 0; 4869 for (i=0; i<nsends; i++) { 4870 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4871 bufA = bufa+sstartsj[i]; 4872 for (j=0; j<nrows; j++) { 4873 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4874 for (ll=0; ll<sbs; ll++) { 4875 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4876 for (l=0; l<ncols; l++) { 4877 *bufA++ = vals[l]; 4878 } 4879 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4880 } 4881 } 4882 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4883 } 4884 /* recvs and sends of a-array are completed */ 4885 i = nrecvs; 4886 while (i--) { 4887 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4888 } 4889 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4890 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4891 4892 if (scall == MAT_INITIAL_MATRIX) { 4893 /* put together the new matrix */ 4894 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4895 4896 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4897 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4898 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4899 b_oth->free_a = PETSC_TRUE; 4900 b_oth->free_ij = PETSC_TRUE; 4901 b_oth->nonew = 0; 4902 4903 ierr = PetscFree(bufj);CHKERRQ(ierr); 4904 if (!startsj_s || !bufa_ptr) { 4905 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4906 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4907 } else { 4908 *startsj_s = sstartsj; 4909 *startsj_r = rstartsj; 4910 *bufa_ptr = bufa; 4911 } 4912 } 4913 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4914 PetscFunctionReturn(0); 4915 } 4916 4917 #undef __FUNCT__ 4918 #define __FUNCT__ "MatGetCommunicationStructs" 4919 /*@C 4920 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4921 4922 Not Collective 4923 4924 Input Parameters: 4925 . A - The matrix in mpiaij format 4926 4927 Output Parameter: 4928 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4929 . colmap - A map from global column index to local index into lvec 4930 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4931 4932 Level: developer 4933 4934 @*/ 4935 #if defined(PETSC_USE_CTABLE) 4936 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4937 #else 4938 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4939 #endif 4940 { 4941 Mat_MPIAIJ *a; 4942 4943 PetscFunctionBegin; 4944 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4945 PetscValidPointer(lvec, 2); 4946 PetscValidPointer(colmap, 3); 4947 PetscValidPointer(multScatter, 4); 4948 a = (Mat_MPIAIJ*) A->data; 4949 if (lvec) *lvec = a->lvec; 4950 if (colmap) *colmap = a->colmap; 4951 if (multScatter) *multScatter = a->Mvctx; 4952 PetscFunctionReturn(0); 4953 } 4954 4955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4956 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4958 #if defined(PETSC_HAVE_ELEMENTAL) 4959 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4960 #endif 4961 4962 #undef __FUNCT__ 4963 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4964 /* 4965 Computes (B'*A')' since computing B*A directly is untenable 4966 4967 n p p 4968 ( ) ( ) ( ) 4969 m ( A ) * n ( B ) = m ( C ) 4970 ( ) ( ) ( ) 4971 4972 */ 4973 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4974 { 4975 PetscErrorCode ierr; 4976 Mat At,Bt,Ct; 4977 4978 PetscFunctionBegin; 4979 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4980 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4981 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4982 ierr = MatDestroy(&At);CHKERRQ(ierr); 4983 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4984 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4985 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4986 PetscFunctionReturn(0); 4987 } 4988 4989 #undef __FUNCT__ 4990 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4991 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4992 { 4993 PetscErrorCode ierr; 4994 PetscInt m=A->rmap->n,n=B->cmap->n; 4995 Mat Cmat; 4996 4997 PetscFunctionBegin; 4998 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4999 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5000 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5001 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5002 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5003 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5004 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5005 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5006 5007 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5008 5009 *C = Cmat; 5010 PetscFunctionReturn(0); 5011 } 5012 5013 /* ----------------------------------------------------------------*/ 5014 #undef __FUNCT__ 5015 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5016 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5017 { 5018 PetscErrorCode ierr; 5019 5020 PetscFunctionBegin; 5021 if (scall == MAT_INITIAL_MATRIX) { 5022 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5023 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5024 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5025 } 5026 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5027 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5028 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5029 PetscFunctionReturn(0); 5030 } 5031 5032 /*MC 5033 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5034 5035 Options Database Keys: 5036 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5037 5038 Level: beginner 5039 5040 .seealso: MatCreateAIJ() 5041 M*/ 5042 5043 #undef __FUNCT__ 5044 #define __FUNCT__ "MatCreate_MPIAIJ" 5045 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5046 { 5047 Mat_MPIAIJ *b; 5048 PetscErrorCode ierr; 5049 PetscMPIInt size; 5050 5051 PetscFunctionBegin; 5052 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5053 5054 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5055 B->data = (void*)b; 5056 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5057 B->assembled = PETSC_FALSE; 5058 B->insertmode = NOT_SET_VALUES; 5059 b->size = size; 5060 5061 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5062 5063 /* build cache for off array entries formed */ 5064 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5065 5066 b->donotstash = PETSC_FALSE; 5067 b->colmap = 0; 5068 b->garray = 0; 5069 b->roworiented = PETSC_TRUE; 5070 5071 /* stuff used for matrix vector multiply */ 5072 b->lvec = NULL; 5073 b->Mvctx = NULL; 5074 5075 /* stuff for MatGetRow() */ 5076 b->rowindices = 0; 5077 b->rowvalues = 0; 5078 b->getrowactive = PETSC_FALSE; 5079 5080 /* flexible pointer used in CUSP/CUSPARSE classes */ 5081 b->spptr = NULL; 5082 5083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5094 #if defined(PETSC_HAVE_ELEMENTAL) 5095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5096 #endif 5097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5101 PetscFunctionReturn(0); 5102 } 5103 5104 #undef __FUNCT__ 5105 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5106 /*@C 5107 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5108 and "off-diagonal" part of the matrix in CSR format. 5109 5110 Collective on MPI_Comm 5111 5112 Input Parameters: 5113 + comm - MPI communicator 5114 . m - number of local rows (Cannot be PETSC_DECIDE) 5115 . n - This value should be the same as the local size used in creating the 5116 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5117 calculated if N is given) For square matrices n is almost always m. 5118 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5119 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5120 . i - row indices for "diagonal" portion of matrix 5121 . j - column indices 5122 . a - matrix values 5123 . oi - row indices for "off-diagonal" portion of matrix 5124 . oj - column indices 5125 - oa - matrix values 5126 5127 Output Parameter: 5128 . mat - the matrix 5129 5130 Level: advanced 5131 5132 Notes: 5133 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5134 must free the arrays once the matrix has been destroyed and not before. 5135 5136 The i and j indices are 0 based 5137 5138 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5139 5140 This sets local rows and cannot be used to set off-processor values. 5141 5142 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5143 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5144 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5145 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5146 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5147 communication if it is known that only local entries will be set. 5148 5149 .keywords: matrix, aij, compressed row, sparse, parallel 5150 5151 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5152 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5153 @*/ 5154 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5155 { 5156 PetscErrorCode ierr; 5157 Mat_MPIAIJ *maij; 5158 5159 PetscFunctionBegin; 5160 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5161 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5162 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5163 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5164 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5165 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5166 maij = (Mat_MPIAIJ*) (*mat)->data; 5167 5168 (*mat)->preallocated = PETSC_TRUE; 5169 5170 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5171 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5172 5173 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5174 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5175 5176 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5177 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5178 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5179 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5180 5181 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5182 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5183 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5184 PetscFunctionReturn(0); 5185 } 5186 5187 /* 5188 Special version for direct calls from Fortran 5189 */ 5190 #include <petsc/private/fortranimpl.h> 5191 5192 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5193 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5194 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5195 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5196 #endif 5197 5198 /* Change these macros so can be used in void function */ 5199 #undef CHKERRQ 5200 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5201 #undef SETERRQ2 5202 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5203 #undef SETERRQ3 5204 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5205 #undef SETERRQ 5206 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5207 5208 #undef __FUNCT__ 5209 #define __FUNCT__ "matsetvaluesmpiaij_" 5210 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5211 { 5212 Mat mat = *mmat; 5213 PetscInt m = *mm, n = *mn; 5214 InsertMode addv = *maddv; 5215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5216 PetscScalar value; 5217 PetscErrorCode ierr; 5218 5219 MatCheckPreallocated(mat,1); 5220 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5221 5222 #if defined(PETSC_USE_DEBUG) 5223 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5224 #endif 5225 { 5226 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5227 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5228 PetscBool roworiented = aij->roworiented; 5229 5230 /* Some Variables required in the macro */ 5231 Mat A = aij->A; 5232 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5233 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5234 MatScalar *aa = a->a; 5235 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5236 Mat B = aij->B; 5237 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5238 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5239 MatScalar *ba = b->a; 5240 5241 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5242 PetscInt nonew = a->nonew; 5243 MatScalar *ap1,*ap2; 5244 5245 PetscFunctionBegin; 5246 for (i=0; i<m; i++) { 5247 if (im[i] < 0) continue; 5248 #if defined(PETSC_USE_DEBUG) 5249 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5250 #endif 5251 if (im[i] >= rstart && im[i] < rend) { 5252 row = im[i] - rstart; 5253 lastcol1 = -1; 5254 rp1 = aj + ai[row]; 5255 ap1 = aa + ai[row]; 5256 rmax1 = aimax[row]; 5257 nrow1 = ailen[row]; 5258 low1 = 0; 5259 high1 = nrow1; 5260 lastcol2 = -1; 5261 rp2 = bj + bi[row]; 5262 ap2 = ba + bi[row]; 5263 rmax2 = bimax[row]; 5264 nrow2 = bilen[row]; 5265 low2 = 0; 5266 high2 = nrow2; 5267 5268 for (j=0; j<n; j++) { 5269 if (roworiented) value = v[i*n+j]; 5270 else value = v[i+j*m]; 5271 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5272 if (in[j] >= cstart && in[j] < cend) { 5273 col = in[j] - cstart; 5274 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5275 } else if (in[j] < 0) continue; 5276 #if defined(PETSC_USE_DEBUG) 5277 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5278 #endif 5279 else { 5280 if (mat->was_assembled) { 5281 if (!aij->colmap) { 5282 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5283 } 5284 #if defined(PETSC_USE_CTABLE) 5285 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5286 col--; 5287 #else 5288 col = aij->colmap[in[j]] - 1; 5289 #endif 5290 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5291 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5292 col = in[j]; 5293 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5294 B = aij->B; 5295 b = (Mat_SeqAIJ*)B->data; 5296 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5297 rp2 = bj + bi[row]; 5298 ap2 = ba + bi[row]; 5299 rmax2 = bimax[row]; 5300 nrow2 = bilen[row]; 5301 low2 = 0; 5302 high2 = nrow2; 5303 bm = aij->B->rmap->n; 5304 ba = b->a; 5305 } 5306 } else col = in[j]; 5307 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5308 } 5309 } 5310 } else if (!aij->donotstash) { 5311 if (roworiented) { 5312 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5313 } else { 5314 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5315 } 5316 } 5317 } 5318 } 5319 PetscFunctionReturnVoid(); 5320 } 5321 5322