1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 9 10 #include <immintrin.h> 11 12 #if !defined(_MM_SCALE_8) 13 #define _MM_SCALE_8 8 14 #endif 15 16 #if defined(__AVX512F__) 17 /* these do not work 18 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 19 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 20 */ 21 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 22 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 23 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \ 24 vec_vals = _mm512_loadu_pd(aval); \ 25 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 26 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 27 #elif defined(__AVX2__) && defined(__FMA__) 28 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 29 vec_vals = _mm256_loadu_pd(aval); \ 30 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \ 31 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 32 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 33 #endif 34 #endif /* PETSC_HAVE_IMMINTRIN_H */ 35 36 /*@C 37 MatSeqSELLSetPreallocation - For good matrix assembly performance 38 the user should preallocate the matrix storage by setting the parameter nz 39 (or the array nnz). By setting these parameters accurately, performance 40 during matrix assembly can be increased significantly. 41 42 Collective on MPI_Comm 43 44 Input Parameters: 45 + B - The matrix 46 . nz - number of nonzeros per row (same for all rows) 47 - nnz - array containing the number of nonzeros in the various rows 48 (possibly different for each row) or NULL 49 50 Notes: 51 If nnz is given then nz is ignored. 52 53 Specify the preallocated storage with either nz or nnz (not both). 54 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 55 allocation. For large problems you MUST preallocate memory or you 56 will get TERRIBLE performance, see the users' manual chapter on matrices. 57 58 You can call MatGetInfo() to get information on how effective the preallocation was; 59 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 60 You can also run with the option -info and look for messages with the string 61 malloc in them to see if additional memory allocation was needed. 62 63 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 64 entries or columns indices. 65 66 The maximum number of nonzeos in any row should be as accurate as possible. 67 If it is underestimated, you will get bad performance due to reallocation 68 (MatSeqXSELLReallocateSELL). 69 70 Level: intermediate 71 72 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 73 74 @*/ 75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 81 PetscValidType(B,1); 82 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 87 { 88 Mat_SeqSELL *b; 89 PetscInt i,j,totalslices; 90 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 95 if (maxallocrow == MAT_SKIP_ALLOCATION) { 96 skipallocation = PETSC_TRUE; 97 maxallocrow = 0; 98 } 99 100 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 101 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 102 103 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 104 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 105 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 106 if (rlen) { 107 for (i=0; i<B->rmap->n; i++) { 108 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 109 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 110 } 111 } 112 113 B->preallocated = PETSC_TRUE; 114 115 b = (Mat_SeqSELL*)B->data; 116 117 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 118 b->totalslices = totalslices; 119 if (!skipallocation) { 120 if (B->rmap->n & 0x07) {ierr = PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);CHKERRQ(ierr);} 121 122 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 123 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 125 } 126 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 127 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 128 else if (maxallocrow < 0) maxallocrow = 1; 129 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 130 } else { 131 maxallocrow = 0; 132 b->sliidx[0] = 0; 133 for (i=1; i<totalslices; i++) { 134 b->sliidx[i] = 0; 135 for (j=0;j<8;j++) { 136 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 137 } 138 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 139 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 140 } 141 /* last slice */ 142 b->sliidx[totalslices] = 0; 143 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 144 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 145 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 146 } 147 148 /* allocate space for val, colidx, rlen */ 149 /* FIXME: should B's old memory be unlogged? */ 150 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 151 /* FIXME: assuming an element of the bit array takes 8 bits */ 152 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 153 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 154 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 155 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 156 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 157 158 b->singlemalloc = PETSC_TRUE; 159 b->free_val = PETSC_TRUE; 160 b->free_colidx = PETSC_TRUE; 161 } else { 162 b->free_val = PETSC_FALSE; 163 b->free_colidx = PETSC_FALSE; 164 } 165 166 b->nz = 0; 167 b->maxallocrow = maxallocrow; 168 b->rlenmax = maxallocrow; 169 b->maxallocmat = b->sliidx[totalslices]; 170 B->info.nz_unneeded = (double)b->maxallocmat; 171 if (realalloc) { 172 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 178 { 179 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 180 PetscInt shift; 181 182 PetscFunctionBegin; 183 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 184 if (nz) *nz = a->rlen[row]; 185 shift = a->sliidx[row>>3]+(row&0x07); 186 if (!a->getrowcols) { 187 PetscErrorCode ierr; 188 189 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 190 } 191 if (idx) { 192 PetscInt j; 193 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 194 *idx = a->getrowcols; 195 } 196 if (v) { 197 PetscInt j; 198 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 199 *v = a->getrowvals; 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 205 { 206 PetscFunctionBegin; 207 PetscFunctionReturn(0); 208 } 209 210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 211 { 212 Mat B; 213 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 214 PetscInt i; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (reuse == MAT_REUSE_MATRIX) { 219 B = *newmat; 220 ierr = MatZeroEntries(B);CHKERRQ(ierr); 221 } else { 222 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 223 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 224 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 226 } 227 228 for (i=0; i<A->rmap->n; i++) { 229 PetscInt nz = 0,*cols = NULL; 230 PetscScalar *vals = NULL; 231 232 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 233 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 234 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 235 } 236 237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 B->rmap->bs = A->rmap->bs; 240 241 if (reuse == MAT_INPLACE_MATRIX) { 242 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 243 } else { 244 *newmat = B; 245 } 246 PetscFunctionReturn(0); 247 } 248 249 #include <../src/mat/impls/aij/seq/aij.h> 250 251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 252 { 253 Mat B; 254 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 255 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 256 const PetscInt *cols; 257 const PetscScalar *vals; 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 262 if (reuse == MAT_REUSE_MATRIX) { 263 B = *newmat; 264 } else { 265 /* Can we just use ilen? */ 266 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 267 for (i=0; i<m; i++) { 268 rowlengths[i] = ai[i+1] - ai[i]; 269 } 270 271 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 272 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 273 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 274 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 275 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 276 } 277 278 for (row=0; row<m; row++) { 279 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 280 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 281 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 282 } 283 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 284 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 285 B->rmap->bs = A->rmap->bs; 286 287 if (reuse == MAT_INPLACE_MATRIX) { 288 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 289 } else { 290 *newmat = B; 291 } 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 296 { 297 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 298 PetscScalar *y; 299 const PetscScalar *x; 300 const MatScalar *aval=a->val; 301 PetscInt totalslices=a->totalslices; 302 const PetscInt *acolidx=a->colidx; 303 PetscInt i,j; 304 PetscErrorCode ierr; 305 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 306 __m512d vec_x,vec_y,vec_vals; 307 __m256i vec_idx; 308 __mmask8 mask; 309 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 310 __m256i vec_idx2,vec_idx3,vec_idx4; 311 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 312 __m128i vec_idx; 313 __m256d vec_x,vec_y,vec_y2,vec_vals; 314 MatScalar yval; 315 PetscInt r,rows_left,row,nnz_in_row; 316 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 317 __m128d vec_x_tmp; 318 __m256d vec_x,vec_y,vec_y2,vec_vals; 319 MatScalar yval; 320 PetscInt r,rows_left,row,nnz_in_row; 321 #else 322 PetscScalar sum[8]; 323 #endif 324 325 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 326 #pragma disjoint(*x,*y,*aval) 327 #endif 328 329 PetscFunctionBegin; 330 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 331 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 332 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 333 for (i=0; i<totalslices; i++) { /* loop over slices */ 334 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 335 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 336 337 vec_y = _mm512_setzero_pd(); 338 vec_y2 = _mm512_setzero_pd(); 339 vec_y3 = _mm512_setzero_pd(); 340 vec_y4 = _mm512_setzero_pd(); 341 342 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 343 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 344 case 3: 345 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 346 acolidx += 8; aval += 8; 347 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 348 acolidx += 8; aval += 8; 349 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 350 acolidx += 8; aval += 8; 351 j += 3; 352 break; 353 case 2: 354 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 355 acolidx += 8; aval += 8; 356 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 357 acolidx += 8; aval += 8; 358 j += 2; 359 break; 360 case 1: 361 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 362 acolidx += 8; aval += 8; 363 j += 1; 364 break; 365 } 366 #pragma novector 367 for (; j<(a->sliidx[i+1]>>3); j+=4) { 368 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 369 acolidx += 8; aval += 8; 370 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 371 acolidx += 8; aval += 8; 372 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 373 acolidx += 8; aval += 8; 374 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 375 acolidx += 8; aval += 8; 376 } 377 378 vec_y = _mm512_add_pd(vec_y,vec_y2); 379 vec_y = _mm512_add_pd(vec_y,vec_y3); 380 vec_y = _mm512_add_pd(vec_y,vec_y4); 381 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 382 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 383 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y); 384 } else { 385 _mm512_storeu_pd(&y[8*i],vec_y); 386 } 387 } 388 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 389 for (i=0; i<totalslices; i++) { /* loop over full slices */ 390 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 391 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 392 393 /* last slice may have padding rows. Don't use vectorization. */ 394 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 395 rows_left = A->rmap->n - 8*i; 396 for (r=0; r<rows_left; ++r) { 397 yval = (MatScalar)0; 398 row = 8*i + r; 399 nnz_in_row = a->rlen[row]; 400 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 401 y[row] = yval; 402 } 403 break; 404 } 405 406 vec_y = _mm256_setzero_pd(); 407 vec_y2 = _mm256_setzero_pd(); 408 409 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 410 #pragma novector 411 #pragma unroll(2) 412 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 413 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 414 aval += 4; acolidx += 4; 415 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 416 aval += 4; acolidx += 4; 417 } 418 419 _mm256_storeu_pd(y+i*8,vec_y); 420 _mm256_storeu_pd(y+i*8+4,vec_y2); 421 } 422 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 423 for (i=0; i<totalslices; i++) { /* loop over full slices */ 424 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 425 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 426 427 vec_y = _mm256_setzero_pd(); 428 vec_y2 = _mm256_setzero_pd(); 429 430 /* last slice may have padding rows. Don't use vectorization. */ 431 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 432 rows_left = A->rmap->n - 8*i; 433 for (r=0; r<rows_left; ++r) { 434 yval = (MatScalar)0; 435 row = 8*i + r; 436 nnz_in_row = a->rlen[row]; 437 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 438 y[row] = yval; 439 } 440 break; 441 } 442 443 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 444 #pragma novector 445 #pragma unroll(2) 446 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 447 vec_vals = _mm256_loadu_pd(aval); 448 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 449 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 450 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 451 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 452 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 453 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 454 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 455 aval += 4; 456 457 vec_vals = _mm256_loadu_pd(aval); 458 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 459 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 460 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 461 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 462 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 463 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 464 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 465 aval += 4; 466 } 467 468 _mm256_storeu_pd(y + i*8, vec_y); 469 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 470 } 471 #else 472 for (i=0; i<totalslices; i++) { /* loop over slices */ 473 for (j=0; j<8; j++) sum[j] = 0.0; 474 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 475 sum[0] += aval[j] * x[acolidx[j]]; 476 sum[1] += aval[j+1] * x[acolidx[j+1]]; 477 sum[2] += aval[j+2] * x[acolidx[j+2]]; 478 sum[3] += aval[j+3] * x[acolidx[j+3]]; 479 sum[4] += aval[j+4] * x[acolidx[j+4]]; 480 sum[5] += aval[j+5] * x[acolidx[j+5]]; 481 sum[6] += aval[j+6] * x[acolidx[j+6]]; 482 sum[7] += aval[j+7] * x[acolidx[j+7]]; 483 } 484 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 485 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 486 } else { 487 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 488 } 489 } 490 #endif 491 492 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 493 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 494 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 499 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 500 { 501 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 502 PetscScalar *y,*z; 503 const PetscScalar *x; 504 const MatScalar *aval=a->val; 505 PetscInt totalslices=a->totalslices; 506 const PetscInt *acolidx=a->colidx; 507 PetscInt i,j; 508 PetscErrorCode ierr; 509 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 510 __m512d vec_x,vec_y,vec_vals; 511 __m256i vec_idx; 512 __mmask8 mask; 513 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 514 __m256i vec_idx2,vec_idx3,vec_idx4; 515 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 516 __m128d vec_x_tmp; 517 __m256d vec_x,vec_y,vec_y2,vec_vals; 518 MatScalar yval; 519 PetscInt r,row,nnz_in_row; 520 #else 521 PetscScalar sum[8]; 522 #endif 523 524 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 525 #pragma disjoint(*x,*y,*aval) 526 #endif 527 528 PetscFunctionBegin; 529 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 530 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 531 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 532 for (i=0; i<totalslices; i++) { /* loop over slices */ 533 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 534 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 535 536 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 537 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 538 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]); 539 } else { 540 vec_y = _mm512_loadu_pd(&y[8*i]); 541 } 542 vec_y2 = _mm512_setzero_pd(); 543 vec_y3 = _mm512_setzero_pd(); 544 vec_y4 = _mm512_setzero_pd(); 545 546 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 547 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 548 case 3: 549 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 550 acolidx += 8; aval += 8; 551 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 552 acolidx += 8; aval += 8; 553 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 554 acolidx += 8; aval += 8; 555 j += 3; 556 break; 557 case 2: 558 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 559 acolidx += 8; aval += 8; 560 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 561 acolidx += 8; aval += 8; 562 j += 2; 563 break; 564 case 1: 565 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 566 acolidx += 8; aval += 8; 567 j += 1; 568 break; 569 } 570 #pragma novector 571 for (; j<(a->sliidx[i+1]>>3); j+=4) { 572 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 573 acolidx += 8; aval += 8; 574 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 575 acolidx += 8; aval += 8; 576 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 577 acolidx += 8; aval += 8; 578 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 579 acolidx += 8; aval += 8; 580 } 581 582 vec_y = _mm512_add_pd(vec_y,vec_y2); 583 vec_y = _mm512_add_pd(vec_y,vec_y3); 584 vec_y = _mm512_add_pd(vec_y,vec_y4); 585 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 586 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y); 587 } else { 588 _mm512_storeu_pd(&z[8*i],vec_y); 589 } 590 } 591 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 592 for (i=0; i<totalslices; i++) { /* loop over full slices */ 593 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 594 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 595 596 /* last slice may have padding rows. Don't use vectorization. */ 597 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 598 for (r=0; r<(A->rmap->n & 0x07); ++r) { 599 row = 8*i + r; 600 yval = (MatScalar)0.0; 601 nnz_in_row = a->rlen[row]; 602 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 603 z[row] = y[row] + yval; 604 } 605 break; 606 } 607 608 vec_y = _mm256_loadu_pd(y+8*i); 609 vec_y2 = _mm256_loadu_pd(y+8*i+4); 610 611 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 612 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 613 vec_vals = _mm256_loadu_pd(aval); 614 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 615 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 616 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 617 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 618 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 619 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 620 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 621 aval += 4; 622 623 vec_vals = _mm256_loadu_pd(aval); 624 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 625 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 626 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 627 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 628 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 629 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 630 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 631 aval += 4; 632 } 633 634 _mm256_storeu_pd(z+i*8,vec_y); 635 _mm256_storeu_pd(z+i*8+4,vec_y2); 636 } 637 #else 638 for (i=0; i<totalslices; i++) { /* loop over slices */ 639 for (j=0; j<8; j++) sum[j] = 0.0; 640 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 641 sum[0] += aval[j] * x[acolidx[j]]; 642 sum[1] += aval[j+1] * x[acolidx[j+1]]; 643 sum[2] += aval[j+2] * x[acolidx[j+2]]; 644 sum[3] += aval[j+3] * x[acolidx[j+3]]; 645 sum[4] += aval[j+4] * x[acolidx[j+4]]; 646 sum[5] += aval[j+5] * x[acolidx[j+5]]; 647 sum[6] += aval[j+6] * x[acolidx[j+6]]; 648 sum[7] += aval[j+7] * x[acolidx[j+7]]; 649 } 650 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 651 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 652 } else { 653 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 654 } 655 } 656 #endif 657 658 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 659 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 660 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 665 { 666 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 667 PetscScalar *y; 668 const PetscScalar *x; 669 const MatScalar *aval=a->val; 670 const PetscInt *acolidx=a->colidx; 671 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 672 PetscErrorCode ierr; 673 674 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 675 #pragma disjoint(*x,*y,*aval) 676 #endif 677 678 PetscFunctionBegin; 679 if (A->symmetric) { 680 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 684 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 685 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 686 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 687 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 688 for (r=0; r<(A->rmap->n & 0x07); ++r) { 689 row = 8*i + r; 690 nnz_in_row = a->rlen[row]; 691 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 692 } 693 break; 694 } 695 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 696 y[acolidx[j]] += aval[j] * x[8*i]; 697 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 698 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 699 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 700 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 701 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 702 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 703 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 704 } 705 } 706 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 707 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 708 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 713 { 714 PetscErrorCode ierr; 715 716 PetscFunctionBegin; 717 if (A->symmetric) { 718 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 719 } else { 720 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 721 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 722 } 723 PetscFunctionReturn(0); 724 } 725 726 /* 727 Checks for missing diagonals 728 */ 729 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 730 { 731 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 732 PetscInt *diag,i; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 *missing = PETSC_FALSE; 737 if (A->rmap->n > 0 && !(a->colidx)) { 738 *missing = PETSC_TRUE; 739 if (d) *d = 0; 740 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 741 } else { 742 diag = a->diag; 743 for (i=0; i<A->rmap->n; i++) { 744 if (diag[i] == -1) { 745 *missing = PETSC_TRUE; 746 if (d) *d = i; 747 ierr = PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);CHKERRQ(ierr); 748 break; 749 } 750 } 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 756 { 757 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 758 PetscInt i,j,m=A->rmap->n,shift; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 if (!a->diag) { 763 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 764 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 765 a->free_diag = PETSC_TRUE; 766 } 767 for (i=0; i<m; i++) { /* loop over rows */ 768 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 769 a->diag[i] = -1; 770 for (j=0; j<a->rlen[i]; j++) { 771 if (a->colidx[shift+j*8] == i) { 772 a->diag[i] = shift+j*8; 773 break; 774 } 775 } 776 } 777 PetscFunctionReturn(0); 778 } 779 780 /* 781 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 782 */ 783 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 784 { 785 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 786 PetscInt i,*diag,m = A->rmap->n; 787 MatScalar *val = a->val; 788 PetscScalar *idiag,*mdiag; 789 PetscErrorCode ierr; 790 791 PetscFunctionBegin; 792 if (a->idiagvalid) PetscFunctionReturn(0); 793 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 794 diag = a->diag; 795 if (!a->idiag) { 796 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 797 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 798 val = a->val; 799 } 800 mdiag = a->mdiag; 801 idiag = a->idiag; 802 803 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 804 for (i=0; i<m; i++) { 805 mdiag[i] = val[diag[i]]; 806 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 807 if (PetscRealPart(fshift)) { 808 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 809 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 810 A->factorerror_zeropivot_value = 0.0; 811 A->factorerror_zeropivot_row = i; 812 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 813 } 814 idiag[i] = 1.0/val[diag[i]]; 815 } 816 ierr = PetscLogFlops(m);CHKERRQ(ierr); 817 } else { 818 for (i=0; i<m; i++) { 819 mdiag[i] = val[diag[i]]; 820 idiag[i] = omega/(fshift + val[diag[i]]); 821 } 822 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 823 } 824 a->idiagvalid = PETSC_TRUE; 825 PetscFunctionReturn(0); 826 } 827 828 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 829 { 830 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 831 PetscErrorCode ierr; 832 833 PetscFunctionBegin; 834 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 835 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatDestroy_SeqSELL(Mat A) 840 { 841 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 842 PetscErrorCode ierr; 843 844 PetscFunctionBegin; 845 #if defined(PETSC_USE_LOG) 846 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 847 #endif 848 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 849 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 850 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 851 ierr = PetscFree(a->diag);CHKERRQ(ierr); 852 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 853 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 854 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 855 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 856 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 857 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 858 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 859 860 ierr = PetscFree(A->data);CHKERRQ(ierr); 861 862 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 865 #if defined(PETSC_HAVE_ELEMENTAL) 866 #endif 867 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 872 { 873 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 switch (op) { 878 case MAT_ROW_ORIENTED: 879 a->roworiented = flg; 880 break; 881 case MAT_KEEP_NONZERO_PATTERN: 882 a->keepnonzeropattern = flg; 883 break; 884 case MAT_NEW_NONZERO_LOCATIONS: 885 a->nonew = (flg ? 0 : 1); 886 break; 887 case MAT_NEW_NONZERO_LOCATION_ERR: 888 a->nonew = (flg ? -1 : 0); 889 break; 890 case MAT_NEW_NONZERO_ALLOCATION_ERR: 891 a->nonew = (flg ? -2 : 0); 892 break; 893 case MAT_UNUSED_NONZERO_LOCATION_ERR: 894 a->nounused = (flg ? -1 : 0); 895 break; 896 case MAT_NEW_DIAGONALS: 897 case MAT_IGNORE_OFF_PROC_ENTRIES: 898 case MAT_USE_HASH_TABLE: 899 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 900 break; 901 case MAT_SPD: 902 case MAT_SYMMETRIC: 903 case MAT_STRUCTURALLY_SYMMETRIC: 904 case MAT_HERMITIAN: 905 case MAT_SYMMETRY_ETERNAL: 906 /* These options are handled directly by MatSetOption() */ 907 break; 908 default: 909 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 910 } 911 PetscFunctionReturn(0); 912 } 913 914 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 915 { 916 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 917 PetscInt i,j,n,shift; 918 PetscScalar *x,zero=0.0; 919 PetscErrorCode ierr; 920 921 PetscFunctionBegin; 922 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 923 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 924 925 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 926 PetscInt *diag=a->diag; 927 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 928 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 929 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 ierr = VecSet(v,zero);CHKERRQ(ierr); 934 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 935 for (i=0; i<n; i++) { /* loop over rows */ 936 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 937 x[i] = 0; 938 for (j=0; j<a->rlen[i]; j++) { 939 if (a->colidx[shift+j*8] == i) { 940 x[i] = a->val[shift+j*8]; 941 break; 942 } 943 } 944 } 945 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 950 { 951 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 952 const PetscScalar *l,*r; 953 PetscInt i,j,m,n,row; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 if (ll) { 958 /* The local size is used so that VecMPI can be passed to this routine 959 by MatDiagonalScale_MPISELL */ 960 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 961 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 962 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 963 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 964 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 965 a->val[j] *= l[8*i+row]; 966 } 967 } 968 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 969 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 970 } 971 if (rr) { 972 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 973 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 974 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 975 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 976 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 977 a->val[j] *= r[a->colidx[j]]; 978 } 979 } 980 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 981 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 982 } 983 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 988 989 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 990 { 991 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 992 PetscInt *cp,i,k,low,high,t,row,col,l; 993 PetscInt shift; 994 MatScalar *vp; 995 996 PetscFunctionBegin; 997 for (k=0; k<m; k++) { /* loop over requested rows */ 998 row = im[k]; 999 if (row<0) continue; 1000 #if defined(PETSC_USE_DEBUG) 1001 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1002 #endif 1003 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1004 cp = a->colidx+shift; /* pointer to the row */ 1005 vp = a->val+shift; /* pointer to the row */ 1006 for (l=0; l<n; l++) { /* loop over requested columns */ 1007 col = in[l]; 1008 if (col<0) continue; 1009 #if defined(PETSC_USE_DEBUG) 1010 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1011 #endif 1012 high = a->rlen[row]; low = 0; /* assume unsorted */ 1013 while (high-low > 5) { 1014 t = (low+high)/2; 1015 if (*(cp+t*8) > col) high = t; 1016 else low = t; 1017 } 1018 for (i=low; i<high; i++) { 1019 if (*(cp+8*i) > col) break; 1020 if (*(cp+8*i) == col) { 1021 *v++ = *(vp+8*i); 1022 goto finished; 1023 } 1024 } 1025 *v++ = 0.0; 1026 finished:; 1027 } 1028 } 1029 PetscFunctionReturn(0); 1030 } 1031 1032 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1033 { 1034 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1035 PetscInt i,j,m=A->rmap->n,shift; 1036 const char *name; 1037 PetscViewerFormat format; 1038 PetscErrorCode ierr; 1039 1040 PetscFunctionBegin; 1041 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1042 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1043 PetscInt nofinalvalue = 0; 1044 /* 1045 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1046 nofinalvalue = 1; 1047 } 1048 */ 1049 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1050 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1051 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1052 #if defined(PETSC_USE_COMPLEX) 1053 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1054 #else 1055 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1056 #endif 1057 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1058 1059 for (i=0; i<m; i++) { 1060 shift = a->sliidx[i>>3]+(i&0x07); 1061 for (j=0; j<a->rlen[i]; j++) { 1062 #if defined(PETSC_USE_COMPLEX) 1063 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1064 #else 1065 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1066 #endif 1067 } 1068 } 1069 /* 1070 if (nofinalvalue) { 1071 #if defined(PETSC_USE_COMPLEX) 1072 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1073 #else 1074 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1075 #endif 1076 } 1077 */ 1078 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1079 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1081 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1084 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1085 for (i=0; i<m; i++) { 1086 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1087 shift = a->sliidx[i>>3]+(i&0x07); 1088 for (j=0; j<a->rlen[i]; j++) { 1089 #if defined(PETSC_USE_COMPLEX) 1090 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1091 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1092 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1093 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1094 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1095 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1096 } 1097 #else 1098 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1099 #endif 1100 } 1101 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1102 } 1103 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1104 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1105 PetscInt cnt=0,jcnt; 1106 PetscScalar value; 1107 #if defined(PETSC_USE_COMPLEX) 1108 PetscBool realonly=PETSC_TRUE; 1109 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1110 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1111 realonly = PETSC_FALSE; 1112 break; 1113 } 1114 } 1115 #endif 1116 1117 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1118 for (i=0; i<m; i++) { 1119 jcnt = 0; 1120 shift = a->sliidx[i>>3]+(i&0x07); 1121 for (j=0; j<A->cmap->n; j++) { 1122 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1123 value = a->val[cnt++]; 1124 jcnt++; 1125 } else { 1126 value = 0.0; 1127 } 1128 #if defined(PETSC_USE_COMPLEX) 1129 if (realonly) { 1130 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1131 } else { 1132 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1133 } 1134 #else 1135 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1136 #endif 1137 } 1138 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1139 } 1140 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1141 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1142 PetscInt fshift=1; 1143 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1144 #if defined(PETSC_USE_COMPLEX) 1145 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1146 #else 1147 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1148 #endif 1149 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1150 for (i=0; i<m; i++) { 1151 shift = a->sliidx[i>>3]+(i&0x07); 1152 for (j=0; j<a->rlen[i]; j++) { 1153 #if defined(PETSC_USE_COMPLEX) 1154 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1155 #else 1156 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1157 #endif 1158 } 1159 } 1160 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1161 } else if (format == PETSC_VIEWER_NATIVE) { 1162 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1163 PetscInt row; 1164 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1165 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1166 #if defined(PETSC_USE_COMPLEX) 1167 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1168 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1169 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1170 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1171 } else { 1172 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1173 } 1174 #else 1175 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1176 #endif 1177 } 1178 } 1179 } else { 1180 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1181 if (A->factortype) { 1182 for (i=0; i<m; i++) { 1183 shift = a->sliidx[i>>3]+(i&0x07); 1184 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1185 /* L part */ 1186 for (j=shift; j<a->diag[i]; j+=8) { 1187 #if defined(PETSC_USE_COMPLEX) 1188 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1189 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1190 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1191 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1192 } else { 1193 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1194 } 1195 #else 1196 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1197 #endif 1198 } 1199 /* diagonal */ 1200 j = a->diag[i]; 1201 #if defined(PETSC_USE_COMPLEX) 1202 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1203 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1204 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1205 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1206 } else { 1207 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1208 } 1209 #else 1210 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1211 #endif 1212 1213 /* U part */ 1214 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1215 #if defined(PETSC_USE_COMPLEX) 1216 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1217 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1218 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1219 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1220 } else { 1221 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1222 } 1223 #else 1224 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1225 #endif 1226 } 1227 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1228 } 1229 } else { 1230 for (i=0; i<m; i++) { 1231 shift = a->sliidx[i>>3]+(i&0x07); 1232 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1233 for (j=0; j<a->rlen[i]; j++) { 1234 #if defined(PETSC_USE_COMPLEX) 1235 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1236 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1237 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1238 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1241 } 1242 #else 1243 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1244 #endif 1245 } 1246 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1247 } 1248 } 1249 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1250 } 1251 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #include <petscdraw.h> 1256 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1257 { 1258 Mat A=(Mat)Aa; 1259 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1260 PetscInt i,j,m=A->rmap->n,shift; 1261 int color; 1262 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1263 PetscViewer viewer; 1264 PetscViewerFormat format; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1269 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1270 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1271 1272 /* loop over matrix elements drawing boxes */ 1273 1274 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1275 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1276 /* Blue for negative, Cyan for zero and Red for positive */ 1277 color = PETSC_DRAW_BLUE; 1278 for (i=0; i<m; i++) { 1279 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1280 y_l = m - i - 1.0; y_r = y_l + 1.0; 1281 for (j=0; j<a->rlen[i]; j++) { 1282 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1283 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1284 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1285 } 1286 } 1287 color = PETSC_DRAW_CYAN; 1288 for (i=0; i<m; i++) { 1289 shift = a->sliidx[i>>3]+(i&0x07); 1290 y_l = m - i - 1.0; y_r = y_l + 1.0; 1291 for (j=0; j<a->rlen[i]; j++) { 1292 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1293 if (a->val[shift+8*j] != 0.) continue; 1294 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1295 } 1296 } 1297 color = PETSC_DRAW_RED; 1298 for (i=0; i<m; i++) { 1299 shift = a->sliidx[i>>3]+(i&0x07); 1300 y_l = m - i - 1.0; y_r = y_l + 1.0; 1301 for (j=0; j<a->rlen[i]; j++) { 1302 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1303 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1304 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1305 } 1306 } 1307 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1308 } else { 1309 /* use contour shading to indicate magnitude of values */ 1310 /* first determine max of all nonzero values */ 1311 PetscReal minv=0.0,maxv=0.0; 1312 PetscInt count=0; 1313 PetscDraw popup; 1314 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1315 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1316 } 1317 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1318 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1319 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1320 1321 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1322 for (i=0; i<m; i++) { 1323 shift = a->sliidx[i>>3]+(i&0x07); 1324 y_l = m - i - 1.0; 1325 y_r = y_l + 1.0; 1326 for (j=0; j<a->rlen[i]; j++) { 1327 x_l = a->colidx[shift+j*8]; 1328 x_r = x_l + 1.0; 1329 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1330 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1331 count++; 1332 } 1333 } 1334 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1335 } 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #include <petscdraw.h> 1340 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1341 { 1342 PetscDraw draw; 1343 PetscReal xr,yr,xl,yl,h,w; 1344 PetscBool isnull; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1349 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1350 if (isnull) PetscFunctionReturn(0); 1351 1352 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1353 xr += w; yr += h; xl = -w; yl = -h; 1354 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1355 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1356 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1357 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1358 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1363 { 1364 PetscBool iascii,isbinary,isdraw; 1365 PetscErrorCode ierr; 1366 1367 PetscFunctionBegin; 1368 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1369 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1370 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1371 if (iascii) { 1372 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1373 } else if (isbinary) { 1374 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1375 } else if (isdraw) { 1376 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1377 } 1378 PetscFunctionReturn(0); 1379 } 1380 1381 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1382 { 1383 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1384 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1385 MatScalar *vp; 1386 PetscErrorCode ierr; 1387 1388 PetscFunctionBegin; 1389 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1390 /* To do: compress out the unused elements */ 1391 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1392 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1393 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1394 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1395 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1396 for (i=0; i<a->totalslices; ++i) { 1397 shift = a->sliidx[i]; /* starting index of the slice */ 1398 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1399 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1400 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1401 row = 8*i + row_in_slice; 1402 nrow = a->rlen[row]; /* number of nonzeros in row */ 1403 /* 1404 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1405 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1406 */ 1407 lastcol = 0; 1408 if (nrow>0) { /* nonempty row */ 1409 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1410 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1411 for (j=1;j<8;j++) { 1412 if (a->rlen[8*i+j]) { 1413 lastcol = cp[j]; 1414 break; 1415 } 1416 } 1417 } else { 1418 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1419 } 1420 1421 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1422 cp[8*k+row_in_slice] = lastcol; 1423 vp[8*k+row_in_slice] = (MatScalar)0; 1424 } 1425 } 1426 } 1427 1428 A->info.mallocs += a->reallocs; 1429 a->reallocs = 0; 1430 1431 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1436 { 1437 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1438 1439 PetscFunctionBegin; 1440 info->block_size = 1.0; 1441 info->nz_allocated = (double)a->maxallocmat; 1442 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1443 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1444 info->assemblies = (double)A->num_ass; 1445 info->mallocs = (double)A->info.mallocs; 1446 info->memory = ((PetscObject)A)->mem; 1447 if (A->factortype) { 1448 info->fill_ratio_given = A->info.fill_ratio_given; 1449 info->fill_ratio_needed = A->info.fill_ratio_needed; 1450 info->factor_mallocs = A->info.factor_mallocs; 1451 } else { 1452 info->fill_ratio_given = 0; 1453 info->fill_ratio_needed = 0; 1454 info->factor_mallocs = 0; 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1460 { 1461 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1462 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1463 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1464 MatScalar *vp,value; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 for (k=0; k<m; k++) { /* loop over added rows */ 1469 row = im[k]; 1470 if (row < 0) continue; 1471 #if defined(PETSC_USE_DEBUG) 1472 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1473 #endif 1474 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1475 cp = a->colidx+shift; /* pointer to the row */ 1476 vp = a->val+shift; /* pointer to the row */ 1477 nrow = a->rlen[row]; 1478 low = 0; 1479 high = nrow; 1480 1481 for (l=0; l<n; l++) { /* loop over added columns */ 1482 col = in[l]; 1483 if (col<0) continue; 1484 #if defined(PETSC_USE_DEBUG) 1485 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1486 #endif 1487 if (a->roworiented) { 1488 value = v[l+k*n]; 1489 } else { 1490 value = v[k+l*m]; 1491 } 1492 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1493 1494 /* search in this row for the specified colmun, i indicates the column to be set */ 1495 if (col <= lastcol) low = 0; 1496 else high = nrow; 1497 lastcol = col; 1498 while (high-low > 5) { 1499 t = (low+high)/2; 1500 if (*(cp+t*8) > col) high = t; 1501 else low = t; 1502 } 1503 for (i=low; i<high; i++) { 1504 if (*(cp+i*8) > col) break; 1505 if (*(cp+i*8) == col) { 1506 if (is == ADD_VALUES) *(vp+i*8) += value; 1507 else *(vp+i*8) = value; 1508 low = i + 1; 1509 goto noinsert; 1510 } 1511 } 1512 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1513 if (nonew == 1) goto noinsert; 1514 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1515 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1516 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1517 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1518 for (ii=nrow-1; ii>=i; ii--) { 1519 *(cp+(ii+1)*8) = *(cp+ii*8); 1520 *(vp+(ii+1)*8) = *(vp+ii*8); 1521 } 1522 a->rlen[row]++; 1523 *(cp+i*8) = col; 1524 *(vp+i*8) = value; 1525 a->nz++; 1526 A->nonzerostate++; 1527 low = i+1; high++; nrow++; 1528 noinsert:; 1529 } 1530 a->rlen[row] = nrow; 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 1535 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1536 { 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 /* If the two matrices have the same copy implementation, use fast copy. */ 1541 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1542 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1543 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1544 1545 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1546 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1547 } else { 1548 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1549 } 1550 PetscFunctionReturn(0); 1551 } 1552 1553 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1554 { 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1563 { 1564 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1565 1566 PetscFunctionBegin; 1567 *array = a->val; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1572 { 1573 PetscFunctionBegin; 1574 PetscFunctionReturn(0); 1575 } 1576 1577 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1578 { 1579 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1580 PetscInt i; 1581 MatScalar *aval=a->val; 1582 1583 PetscFunctionBegin; 1584 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1589 { 1590 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1591 PetscInt i; 1592 MatScalar *aval=a->val; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1597 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1602 { 1603 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1604 MatScalar *aval=a->val; 1605 PetscScalar oalpha=alpha; 1606 PetscBLASInt one=1,size; 1607 PetscErrorCode ierr; 1608 1609 PetscFunctionBegin; 1610 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1611 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1612 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1613 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1618 { 1619 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1620 PetscErrorCode ierr; 1621 1622 PetscFunctionBegin; 1623 if (!Y->preallocated || !y->nz) { 1624 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1625 } 1626 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1631 { 1632 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1633 PetscScalar *x,sum,*t; 1634 const MatScalar *idiag=0,*mdiag; 1635 const PetscScalar *b,*xb; 1636 PetscInt n,m=A->rmap->n,i,j,shift; 1637 const PetscInt *diag; 1638 PetscErrorCode ierr; 1639 1640 PetscFunctionBegin; 1641 its = its*lits; 1642 1643 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1644 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1645 a->fshift = fshift; 1646 a->omega = omega; 1647 1648 diag = a->diag; 1649 t = a->ssor_work; 1650 idiag = a->idiag; 1651 mdiag = a->mdiag; 1652 1653 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1654 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1655 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1656 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1657 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1658 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1659 1660 if (flag & SOR_ZERO_INITIAL_GUESS) { 1661 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1662 for (i=0; i<m; i++) { 1663 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1664 sum = b[i]; 1665 n = (diag[i]-shift)/8; 1666 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1667 t[i] = sum; 1668 x[i] = sum*idiag[i]; 1669 } 1670 xb = t; 1671 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1672 } else xb = b; 1673 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1674 for (i=m-1; i>=0; i--) { 1675 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1676 sum = xb[i]; 1677 n = a->rlen[i]-(diag[i]-shift)/8-1; 1678 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1679 if (xb == b) { 1680 x[i] = sum*idiag[i]; 1681 } else { 1682 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1683 } 1684 } 1685 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1686 } 1687 its--; 1688 } 1689 while (its--) { 1690 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1691 for (i=0; i<m; i++) { 1692 /* lower */ 1693 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1694 sum = b[i]; 1695 n = (diag[i]-shift)/8; 1696 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1697 t[i] = sum; /* save application of the lower-triangular part */ 1698 /* upper */ 1699 n = a->rlen[i]-(diag[i]-shift)/8-1; 1700 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1701 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1702 } 1703 xb = t; 1704 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1705 } else xb = b; 1706 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1707 for (i=m-1; i>=0; i--) { 1708 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1709 sum = xb[i]; 1710 if (xb == b) { 1711 /* whole matrix (no checkpointing available) */ 1712 n = a->rlen[i]; 1713 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1714 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1715 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1716 n = a->rlen[i]-(diag[i]-shift)/8-1; 1717 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1718 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1719 } 1720 } 1721 if (xb == b) { 1722 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1723 } else { 1724 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1725 } 1726 } 1727 } 1728 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1729 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 /* -------------------------------------------------------------------*/ 1734 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1735 MatGetRow_SeqSELL, 1736 MatRestoreRow_SeqSELL, 1737 MatMult_SeqSELL, 1738 /* 4*/ MatMultAdd_SeqSELL, 1739 MatMultTranspose_SeqSELL, 1740 MatMultTransposeAdd_SeqSELL, 1741 0, 1742 0, 1743 0, 1744 /* 10*/ 0, 1745 0, 1746 0, 1747 MatSOR_SeqSELL, 1748 0, 1749 /* 15*/ MatGetInfo_SeqSELL, 1750 MatEqual_SeqSELL, 1751 MatGetDiagonal_SeqSELL, 1752 MatDiagonalScale_SeqSELL, 1753 0, 1754 /* 20*/ 0, 1755 MatAssemblyEnd_SeqSELL, 1756 MatSetOption_SeqSELL, 1757 MatZeroEntries_SeqSELL, 1758 /* 24*/ 0, 1759 0, 1760 0, 1761 0, 1762 0, 1763 /* 29*/ MatSetUp_SeqSELL, 1764 0, 1765 0, 1766 0, 1767 0, 1768 /* 34*/ MatDuplicate_SeqSELL, 1769 0, 1770 0, 1771 0, 1772 0, 1773 /* 39*/ 0, 1774 0, 1775 0, 1776 MatGetValues_SeqSELL, 1777 MatCopy_SeqSELL, 1778 /* 44*/ 0, 1779 MatScale_SeqSELL, 1780 MatShift_SeqSELL, 1781 0, 1782 0, 1783 /* 49*/ 0, 1784 0, 1785 0, 1786 0, 1787 0, 1788 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1789 0, 1790 0, 1791 0, 1792 0, 1793 /* 59*/ 0, 1794 MatDestroy_SeqSELL, 1795 MatView_SeqSELL, 1796 0, 1797 0, 1798 /* 64*/ 0, 1799 0, 1800 0, 1801 0, 1802 0, 1803 /* 69*/ 0, 1804 0, 1805 0, 1806 0, 1807 0, 1808 /* 74*/ 0, 1809 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1810 0, 1811 0, 1812 0, 1813 /* 79*/ 0, 1814 0, 1815 0, 1816 0, 1817 0, 1818 /* 84*/ 0, 1819 0, 1820 0, 1821 0, 1822 0, 1823 /* 89*/ 0, 1824 0, 1825 0, 1826 0, 1827 0, 1828 /* 94*/ 0, 1829 0, 1830 0, 1831 0, 1832 0, 1833 /* 99*/ 0, 1834 0, 1835 0, 1836 MatConjugate_SeqSELL, 1837 0, 1838 /*104*/ 0, 1839 0, 1840 0, 1841 0, 1842 0, 1843 /*109*/ 0, 1844 0, 1845 0, 1846 0, 1847 MatMissingDiagonal_SeqSELL, 1848 /*114*/ 0, 1849 0, 1850 0, 1851 0, 1852 0, 1853 /*119*/ 0, 1854 0, 1855 0, 1856 0, 1857 0, 1858 /*124*/ 0, 1859 0, 1860 0, 1861 0, 1862 0, 1863 /*129*/ 0, 1864 0, 1865 0, 1866 0, 1867 0, 1868 /*134*/ 0, 1869 0, 1870 0, 1871 0, 1872 0, 1873 /*139*/ 0, 1874 0, 1875 0, 1876 MatFDColoringSetUp_SeqXAIJ, 1877 0, 1878 /*144*/0 1879 }; 1880 1881 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1882 { 1883 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1888 1889 /* allocate space for values if not already there */ 1890 if (!a->saved_values) { 1891 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1892 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1893 } 1894 1895 /* copy values over */ 1896 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1901 { 1902 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1907 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1908 /* copy values over */ 1909 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 /*@C 1914 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1915 1916 Not Collective 1917 1918 Input Parameters: 1919 . mat - a MATSEQSELL matrix 1920 . array - pointer to the data 1921 1922 Level: intermediate 1923 1924 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1925 @*/ 1926 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1927 { 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1936 { 1937 Mat_SeqSELL *b; 1938 PetscMPIInt size; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1943 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1944 1945 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1946 1947 B->data = (void*)b; 1948 1949 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1950 1951 b->row = 0; 1952 b->col = 0; 1953 b->icol = 0; 1954 b->reallocs = 0; 1955 b->ignorezeroentries = PETSC_FALSE; 1956 b->roworiented = PETSC_TRUE; 1957 b->nonew = 0; 1958 b->diag = 0; 1959 b->solve_work = 0; 1960 B->spptr = 0; 1961 b->saved_values = 0; 1962 b->idiag = 0; 1963 b->mdiag = 0; 1964 b->ssor_work = 0; 1965 b->omega = 1.0; 1966 b->fshift = 0.0; 1967 b->idiagvalid = PETSC_FALSE; 1968 b->keepnonzeropattern = PETSC_FALSE; 1969 1970 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1971 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1972 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1973 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1974 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* 1981 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1982 */ 1983 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1984 { 1985 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1986 PetscInt i,m=A->rmap->n; 1987 PetscInt totalslices=a->totalslices; 1988 PetscErrorCode ierr; 1989 1990 PetscFunctionBegin; 1991 c = (Mat_SeqSELL*)C->data; 1992 1993 C->factortype = A->factortype; 1994 c->row = 0; 1995 c->col = 0; 1996 c->icol = 0; 1997 c->reallocs = 0; 1998 1999 C->assembled = PETSC_TRUE; 2000 2001 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2002 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2003 2004 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2005 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2006 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2007 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2008 2009 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2010 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2011 2012 /* allocate the matrix space */ 2013 if (mallocmatspace) { 2014 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2015 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2016 2017 c->singlemalloc = PETSC_TRUE; 2018 2019 if (m > 0) { 2020 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 2021 if (cpvalues == MAT_COPY_VALUES) { 2022 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2023 } else { 2024 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2025 } 2026 } 2027 } 2028 2029 c->ignorezeroentries = a->ignorezeroentries; 2030 c->roworiented = a->roworiented; 2031 c->nonew = a->nonew; 2032 if (a->diag) { 2033 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2034 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2035 for (i=0; i<m; i++) { 2036 c->diag[i] = a->diag[i]; 2037 } 2038 } else c->diag = 0; 2039 2040 c->solve_work = 0; 2041 c->saved_values = 0; 2042 c->idiag = 0; 2043 c->ssor_work = 0; 2044 c->keepnonzeropattern = a->keepnonzeropattern; 2045 c->free_val = PETSC_TRUE; 2046 c->free_colidx = PETSC_TRUE; 2047 2048 c->maxallocmat = a->maxallocmat; 2049 c->maxallocrow = a->maxallocrow; 2050 c->rlenmax = a->rlenmax; 2051 c->nz = a->nz; 2052 C->preallocated = PETSC_TRUE; 2053 2054 c->nonzerorowcnt = a->nonzerorowcnt; 2055 C->nonzerostate = A->nonzerostate; 2056 2057 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2062 { 2063 PetscErrorCode ierr; 2064 2065 PetscFunctionBegin; 2066 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2067 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2068 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2069 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2070 } 2071 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2072 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 /*@C 2077 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2078 2079 Collective on MPI_Comm 2080 2081 Input Parameters: 2082 + comm - MPI communicator, set to PETSC_COMM_SELF 2083 . m - number of rows 2084 . n - number of columns 2085 . rlenmax - maximum number of nonzeros in a row 2086 - rlen - array containing the number of nonzeros in the various rows 2087 (possibly different for each row) or NULL 2088 2089 Output Parameter: 2090 . A - the matrix 2091 2092 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2093 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2094 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2095 2096 Notes: 2097 If nnz is given then nz is ignored 2098 2099 Specify the preallocated storage with either rlenmax or rlen (not both). 2100 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2101 allocation. For large problems you MUST preallocate memory or you 2102 will get TERRIBLE performance, see the users' manual chapter on matrices. 2103 2104 Level: intermediate 2105 2106 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2107 2108 @*/ 2109 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2110 { 2111 PetscErrorCode ierr; 2112 2113 PetscFunctionBegin; 2114 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2115 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2116 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2117 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2122 { 2123 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2124 PetscInt totalslices=a->totalslices; 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 /* If the matrix dimensions are not equal,or no of nonzeros */ 2129 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2130 *flg = PETSC_FALSE; 2131 PetscFunctionReturn(0); 2132 } 2133 /* if the a->colidx are the same */ 2134 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2135 if (!*flg) PetscFunctionReturn(0); 2136 /* if a->val are the same */ 2137 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2142 { 2143 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2144 2145 PetscFunctionBegin; 2146 a->idiagvalid = PETSC_FALSE; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2151 { 2152 #if defined(PETSC_USE_COMPLEX) 2153 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2154 PetscInt i; 2155 PetscScalar *val = a->val; 2156 2157 PetscFunctionBegin; 2158 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2159 val[i] = PetscConj(val[i]); 2160 } 2161 #else 2162 PetscFunctionBegin; 2163 #endif 2164 PetscFunctionReturn(0); 2165 } 2166