1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 PetscErrorCode ierr; 19 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 20 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 21 PetscReal shift = 0.0; 22 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 23 24 PetscFunctionBegin; 25 allowzeropivot = PetscNot(A->erroriffailure); 26 27 if (a->idiagvalid) { 28 if (values) *values = a->idiag; 29 PetscFunctionReturn(0); 30 } 31 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 32 diag_offset = a->diag; 33 if (!a->idiag) { 34 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 35 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 36 } 37 diag = a->idiag; 38 mdiag = a->idiag+bs2*mbs; 39 if (values) *values = a->idiag; 40 /* factor and invert each block */ 41 switch (bs) { 42 case 1: 43 for (i=0; i<mbs; i++) { 44 odiag = v + 1*diag_offset[i]; 45 diag[0] = odiag[0]; 46 mdiag[0] = odiag[0]; 47 48 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 49 if (allowzeropivot) { 50 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 51 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 52 A->factorerror_zeropivot_row = i; 53 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 54 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 55 } 56 57 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 58 diag += 1; 59 mdiag += 1; 60 } 61 break; 62 case 2: 63 for (i=0; i<mbs; i++) { 64 odiag = v + 4*diag_offset[i]; 65 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 66 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 67 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 69 diag += 4; 70 mdiag += 4; 71 } 72 break; 73 case 3: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 9*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78 diag[8] = odiag[8]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 81 mdiag[8] = odiag[8]; 82 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 83 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 94 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 105 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 106 diag += 25; 107 mdiag += 25; 108 } 109 break; 110 case 6: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 36*diag_offset[i]; 113 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += 49; 129 mdiag += 49; 130 } 131 break; 132 default: 133 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 134 for (i=0; i<mbs; i++) { 135 odiag = v + bs2*diag_offset[i]; 136 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 138 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 139 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 140 diag += bs2; 141 mdiag += bs2; 142 } 143 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 144 } 145 a->idiagvalid = PETSC_TRUE; 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 150 { 151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 152 PetscScalar *x,*work,*w,*workt,*t; 153 const MatScalar *v,*aa = a->a, *idiag; 154 const PetscScalar *b,*xb; 155 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 162 its = its*lits; 163 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 164 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 169 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 170 171 if (!m) PetscFunctionReturn(0); 172 diag = a->diag; 173 idiag = a->idiag; 174 k = PetscMax(A->rmap->n,A->cmap->n); 175 if (!a->mult_work) { 176 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 177 } 178 if (!a->sor_workt) { 179 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 180 } 181 if (!a->sor_work) { 182 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 183 } 184 work = a->mult_work; 185 t = a->sor_workt; 186 w = a->sor_work; 187 188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 190 191 if (flag & SOR_ZERO_INITIAL_GUESS) { 192 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 193 switch (bs) { 194 case 1: 195 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 196 t[0] = b[0]; 197 i2 = 1; 198 idiag += 1; 199 for (i=1; i<m; i++) { 200 v = aa + ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; 204 for (j=0; j<nz; j++) { 205 xw[0] = x[vi[j]]; 206 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 207 } 208 t[i2] = s[0]; 209 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 210 x[i2] = xw[0]; 211 idiag += 1; 212 i2 += 1; 213 } 214 break; 215 case 2: 216 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 217 t[0] = b[0]; t[1] = b[1]; 218 i2 = 2; 219 idiag += 4; 220 for (i=1; i<m; i++) { 221 v = aa + 4*ai[i]; 222 vi = aj + ai[i]; 223 nz = diag[i] - ai[i]; 224 s[0] = b[i2]; s[1] = b[i2+1]; 225 for (j=0; j<nz; j++) { 226 idx = 2*vi[j]; 227 it = 4*j; 228 xw[0] = x[idx]; xw[1] = x[1+idx]; 229 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 230 } 231 t[i2] = s[0]; t[i2+1] = s[1]; 232 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 233 x[i2] = xw[0]; x[i2+1] = xw[1]; 234 idiag += 4; 235 i2 += 2; 236 } 237 break; 238 case 3: 239 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 240 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 241 i2 = 3; 242 idiag += 9; 243 for (i=1; i<m; i++) { 244 v = aa + 9*ai[i]; 245 vi = aj + ai[i]; 246 nz = diag[i] - ai[i]; 247 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 248 while (nz--) { 249 idx = 3*(*vi++); 250 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 251 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 252 v += 9; 253 } 254 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 255 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 256 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 257 idiag += 9; 258 i2 += 3; 259 } 260 break; 261 case 4: 262 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 263 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 264 i2 = 4; 265 idiag += 16; 266 for (i=1; i<m; i++) { 267 v = aa + 16*ai[i]; 268 vi = aj + ai[i]; 269 nz = diag[i] - ai[i]; 270 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 271 while (nz--) { 272 idx = 4*(*vi++); 273 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 274 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 275 v += 16; 276 } 277 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 278 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 279 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 280 idiag += 16; 281 i2 += 4; 282 } 283 break; 284 case 5: 285 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 286 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 287 i2 = 5; 288 idiag += 25; 289 for (i=1; i<m; i++) { 290 v = aa + 25*ai[i]; 291 vi = aj + ai[i]; 292 nz = diag[i] - ai[i]; 293 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 294 while (nz--) { 295 idx = 5*(*vi++); 296 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 298 v += 25; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 301 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 302 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 303 idiag += 25; 304 i2 += 5; 305 } 306 break; 307 case 6: 308 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 309 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 310 i2 = 6; 311 idiag += 36; 312 for (i=1; i<m; i++) { 313 v = aa + 36*ai[i]; 314 vi = aj + ai[i]; 315 nz = diag[i] - ai[i]; 316 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 317 while (nz--) { 318 idx = 6*(*vi++); 319 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 320 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 322 v += 36; 323 } 324 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 325 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 326 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 328 idiag += 36; 329 i2 += 6; 330 } 331 break; 332 case 7: 333 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 335 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 336 i2 = 7; 337 idiag += 49; 338 for (i=1; i<m; i++) { 339 v = aa + 49*ai[i]; 340 vi = aj + ai[i]; 341 nz = diag[i] - ai[i]; 342 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 343 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 344 while (nz--) { 345 idx = 7*(*vi++); 346 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 347 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 349 v += 49; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 352 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 353 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 354 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 355 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 356 idiag += 49; 357 i2 += 7; 358 } 359 break; 360 default: 361 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 362 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 i2 = bs; 364 idiag += bs2; 365 for (i=1; i<m; i++) { 366 v = aa + bs2*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 370 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 371 /* copy all rows of x that are needed into contiguous space */ 372 workt = work; 373 for (j=0; j<nz; j++) { 374 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 workt += bs; 376 } 377 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 378 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 380 381 idiag += bs2; 382 i2 += bs; 383 } 384 break; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 388 xb = t; 389 } 390 else xb = b; 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+bs2*(a->mbs-1); 393 i2 = bs * (m-1); 394 switch (bs) { 395 case 1: 396 s[0] = xb[i2]; 397 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 398 x[i2] = xw[0]; 399 i2 -= 1; 400 for (i=m-2; i>=0; i--) { 401 v = aa + (diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; 405 for (j=0; j<nz; j++) { 406 xw[0] = x[vi[j]]; 407 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 408 } 409 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 410 x[i2] = xw[0]; 411 idiag -= 1; 412 i2 -= 1; 413 } 414 break; 415 case 2: 416 s[0] = xb[i2]; s[1] = xb[i2+1]; 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 i2 -= 2; 420 idiag -= 4; 421 for (i=m-2; i>=0; i--) { 422 v = aa + 4*(diag[i] + 1); 423 vi = aj + diag[i] + 1; 424 nz = ai[i+1] - diag[i] - 1; 425 s[0] = xb[i2]; s[1] = xb[i2+1]; 426 for (j=0; j<nz; j++) { 427 idx = 2*vi[j]; 428 it = 4*j; 429 xw[0] = x[idx]; xw[1] = x[1+idx]; 430 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 431 } 432 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 433 x[i2] = xw[0]; x[i2+1] = xw[1]; 434 idiag -= 4; 435 i2 -= 2; 436 } 437 break; 438 case 3: 439 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 i2 -= 3; 443 idiag -= 9; 444 for (i=m-2; i>=0; i--) { 445 v = aa + 9*(diag[i]+1); 446 vi = aj + diag[i] + 1; 447 nz = ai[i+1] - diag[i] - 1; 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 449 while (nz--) { 450 idx = 3*(*vi++); 451 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 452 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 453 v += 9; 454 } 455 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 456 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 457 idiag -= 9; 458 i2 -= 3; 459 } 460 break; 461 case 4: 462 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 i2 -= 4; 466 idiag -= 16; 467 for (i=m-2; i>=0; i--) { 468 v = aa + 16*(diag[i]+1); 469 vi = aj + diag[i] + 1; 470 nz = ai[i+1] - diag[i] - 1; 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 472 while (nz--) { 473 idx = 4*(*vi++); 474 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 475 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 476 v += 16; 477 } 478 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 479 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 480 idiag -= 16; 481 i2 -= 4; 482 } 483 break; 484 case 5: 485 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 i2 -= 5; 489 idiag -= 25; 490 for (i=m-2; i>=0; i--) { 491 v = aa + 25*(diag[i]+1); 492 vi = aj + diag[i] + 1; 493 nz = ai[i+1] - diag[i] - 1; 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 495 while (nz--) { 496 idx = 5*(*vi++); 497 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 499 v += 25; 500 } 501 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 503 idiag -= 25; 504 i2 -= 5; 505 } 506 break; 507 case 6: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 509 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 511 i2 -= 6; 512 idiag -= 36; 513 for (i=m-2; i>=0; i--) { 514 v = aa + 36*(diag[i]+1); 515 vi = aj + diag[i] + 1; 516 nz = ai[i+1] - diag[i] - 1; 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 518 while (nz--) { 519 idx = 6*(*vi++); 520 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 521 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 522 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 523 v += 36; 524 } 525 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 526 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 527 idiag -= 36; 528 i2 -= 6; 529 } 530 break; 531 case 7: 532 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 533 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 534 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 536 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 537 i2 -= 7; 538 idiag -= 49; 539 for (i=m-2; i>=0; i--) { 540 v = aa + 49*(diag[i]+1); 541 vi = aj + diag[i] + 1; 542 nz = ai[i+1] - diag[i] - 1; 543 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 544 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 545 while (nz--) { 546 idx = 7*(*vi++); 547 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 548 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 550 v += 49; 551 } 552 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 554 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 555 idiag -= 49; 556 i2 -= 7; 557 } 558 break; 559 default: 560 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 561 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 562 i2 -= bs; 563 idiag -= bs2; 564 for (i=m-2; i>=0; i--) { 565 v = aa + bs2*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 569 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 570 /* copy all rows of x that are needed into contiguous space */ 571 workt = work; 572 for (j=0; j<nz; j++) { 573 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 workt += bs; 575 } 576 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 577 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 578 579 idiag -= bs2; 580 i2 -= bs; 581 } 582 break; 583 } 584 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 585 } 586 its--; 587 } 588 while (its--) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 590 idiag = a->idiag; 591 i2 = 0; 592 switch (bs) { 593 case 1: 594 for (i=0; i<m; i++) { 595 v = aa + ai[i]; 596 vi = aj + ai[i]; 597 nz = ai[i+1] - ai[i]; 598 s[0] = b[i2]; 599 for (j=0; j<nz; j++) { 600 xw[0] = x[vi[j]]; 601 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 602 } 603 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 604 x[i2] += xw[0]; 605 idiag += 1; 606 i2 += 1; 607 } 608 break; 609 case 2: 610 for (i=0; i<m; i++) { 611 v = aa + 4*ai[i]; 612 vi = aj + ai[i]; 613 nz = ai[i+1] - ai[i]; 614 s[0] = b[i2]; s[1] = b[i2+1]; 615 for (j=0; j<nz; j++) { 616 idx = 2*vi[j]; 617 it = 4*j; 618 xw[0] = x[idx]; xw[1] = x[1+idx]; 619 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 620 } 621 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 622 x[i2] += xw[0]; x[i2+1] += xw[1]; 623 idiag += 4; 624 i2 += 2; 625 } 626 break; 627 case 3: 628 for (i=0; i<m; i++) { 629 v = aa + 9*ai[i]; 630 vi = aj + ai[i]; 631 nz = ai[i+1] - ai[i]; 632 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 633 while (nz--) { 634 idx = 3*(*vi++); 635 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 636 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 637 v += 9; 638 } 639 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 640 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 641 idiag += 9; 642 i2 += 3; 643 } 644 break; 645 case 4: 646 for (i=0; i<m; i++) { 647 v = aa + 16*ai[i]; 648 vi = aj + ai[i]; 649 nz = ai[i+1] - ai[i]; 650 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 651 while (nz--) { 652 idx = 4*(*vi++); 653 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 654 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 655 v += 16; 656 } 657 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 658 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 659 idiag += 16; 660 i2 += 4; 661 } 662 break; 663 case 5: 664 for (i=0; i<m; i++) { 665 v = aa + 25*ai[i]; 666 vi = aj + ai[i]; 667 nz = ai[i+1] - ai[i]; 668 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 672 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 673 v += 25; 674 } 675 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 676 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 677 idiag += 25; 678 i2 += 5; 679 } 680 break; 681 case 6: 682 for (i=0; i<m; i++) { 683 v = aa + 36*ai[i]; 684 vi = aj + ai[i]; 685 nz = ai[i+1] - ai[i]; 686 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 687 while (nz--) { 688 idx = 6*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 692 v += 36; 693 } 694 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 697 idiag += 36; 698 i2 += 6; 699 } 700 break; 701 case 7: 702 for (i=0; i<m; i++) { 703 v = aa + 49*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 707 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 708 while (nz--) { 709 idx = 7*(*vi++); 710 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 711 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 712 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 713 v += 49; 714 } 715 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 716 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 717 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 718 idiag += 49; 719 i2 += 7; 720 } 721 break; 722 default: 723 for (i=0; i<m; i++) { 724 v = aa + bs2*ai[i]; 725 vi = aj + ai[i]; 726 nz = ai[i+1] - ai[i]; 727 728 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 729 /* copy all rows of x that are needed into contiguous space */ 730 workt = work; 731 for (j=0; j<nz; j++) { 732 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 workt += bs; 734 } 735 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 736 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 737 738 idiag += bs2; 739 i2 += bs; 740 } 741 break; 742 } 743 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 744 } 745 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 746 idiag = a->idiag+bs2*(a->mbs-1); 747 i2 = bs * (m-1); 748 switch (bs) { 749 case 1: 750 for (i=m-1; i>=0; i--) { 751 v = aa + ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; 755 for (j=0; j<nz; j++) { 756 xw[0] = x[vi[j]]; 757 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 758 } 759 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 760 x[i2] += xw[0]; 761 idiag -= 1; 762 i2 -= 1; 763 } 764 break; 765 case 2: 766 for (i=m-1; i>=0; i--) { 767 v = aa + 4*ai[i]; 768 vi = aj + ai[i]; 769 nz = ai[i+1] - ai[i]; 770 s[0] = b[i2]; s[1] = b[i2+1]; 771 for (j=0; j<nz; j++) { 772 idx = 2*vi[j]; 773 it = 4*j; 774 xw[0] = x[idx]; xw[1] = x[1+idx]; 775 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 776 } 777 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 778 x[i2] += xw[0]; x[i2+1] += xw[1]; 779 idiag -= 4; 780 i2 -= 2; 781 } 782 break; 783 case 3: 784 for (i=m-1; i>=0; i--) { 785 v = aa + 9*ai[i]; 786 vi = aj + ai[i]; 787 nz = ai[i+1] - ai[i]; 788 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 789 while (nz--) { 790 idx = 3*(*vi++); 791 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 792 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 793 v += 9; 794 } 795 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 796 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 797 idiag -= 9; 798 i2 -= 3; 799 } 800 break; 801 case 4: 802 for (i=m-1; i>=0; i--) { 803 v = aa + 16*ai[i]; 804 vi = aj + ai[i]; 805 nz = ai[i+1] - ai[i]; 806 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 807 while (nz--) { 808 idx = 4*(*vi++); 809 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 810 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 811 v += 16; 812 } 813 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 814 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 815 idiag -= 16; 816 i2 -= 4; 817 } 818 break; 819 case 5: 820 for (i=m-1; i>=0; i--) { 821 v = aa + 25*ai[i]; 822 vi = aj + ai[i]; 823 nz = ai[i+1] - ai[i]; 824 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 825 while (nz--) { 826 idx = 5*(*vi++); 827 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 828 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 829 v += 25; 830 } 831 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 832 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 833 idiag -= 25; 834 i2 -= 5; 835 } 836 break; 837 case 6: 838 for (i=m-1; i>=0; i--) { 839 v = aa + 36*ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i+1] - ai[i]; 842 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 843 while (nz--) { 844 idx = 6*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 848 v += 36; 849 } 850 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 853 idiag -= 36; 854 i2 -= 6; 855 } 856 break; 857 case 7: 858 for (i=m-1; i>=0; i--) { 859 v = aa + 49*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 863 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 864 while (nz--) { 865 idx = 7*(*vi++); 866 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 867 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 868 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 869 v += 49; 870 } 871 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 872 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 873 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 874 idiag -= 49; 875 i2 -= 7; 876 } 877 break; 878 default: 879 for (i=m-1; i>=0; i--) { 880 v = aa + bs2*ai[i]; 881 vi = aj + ai[i]; 882 nz = ai[i+1] - ai[i]; 883 884 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 /* copy all rows of x that are needed into contiguous space */ 886 workt = work; 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 workt += bs; 890 } 891 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 892 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 893 894 idiag -= bs2; 895 i2 -= bs; 896 } 897 break; 898 } 899 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 900 } 901 } 902 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 903 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 908 /* 909 Special version for direct calls from Fortran (Used in PETSc-fun3d) 910 */ 911 #if defined(PETSC_HAVE_FORTRAN_CAPS) 912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 914 #define matsetvaluesblocked4_ matsetvaluesblocked4 915 #endif 916 917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 918 { 919 Mat A = *AA; 920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 921 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 922 PetscInt *ai =a->i,*ailen=a->ilen; 923 PetscInt *aj =a->j,stepval,lastcol = -1; 924 const PetscScalar *value = v; 925 MatScalar *ap,*aa = a->a,*bap; 926 927 PetscFunctionBegin; 928 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 929 stepval = (n-1)*4; 930 for (k=0; k<m; k++) { /* loop over added rows */ 931 row = im[k]; 932 rp = aj + ai[row]; 933 ap = aa + 16*ai[row]; 934 nrow = ailen[row]; 935 low = 0; 936 high = nrow; 937 for (l=0; l<n; l++) { /* loop over added columns */ 938 col = in[l]; 939 if (col <= lastcol) low = 0; 940 else high = nrow; 941 lastcol = col; 942 value = v + k*(stepval+4 + l)*4; 943 while (high-low > 7) { 944 t = (low+high)/2; 945 if (rp[t] > col) high = t; 946 else low = t; 947 } 948 for (i=low; i<high; i++) { 949 if (rp[i] > col) break; 950 if (rp[i] == col) { 951 bap = ap + 16*i; 952 for (ii=0; ii<4; ii++,value+=stepval) { 953 for (jj=ii; jj<16; jj+=4) { 954 bap[jj] += *value++; 955 } 956 } 957 goto noinsert2; 958 } 959 } 960 N = nrow++ - 1; 961 high++; /* added new column index thus must search to one higher than before */ 962 /* shift up all the later entries in this row */ 963 for (ii=N; ii>=i; ii--) { 964 rp[ii+1] = rp[ii]; 965 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 966 } 967 if (N >= i) { 968 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 969 } 970 rp[i] = col; 971 bap = ap + 16*i; 972 for (ii=0; ii<4; ii++,value+=stepval) { 973 for (jj=ii; jj<16; jj+=4) { 974 bap[jj] = *value++; 975 } 976 } 977 noinsert2:; 978 low = i; 979 } 980 ailen[row] = nrow; 981 } 982 PetscFunctionReturnVoid(); 983 } 984 985 #if defined(PETSC_HAVE_FORTRAN_CAPS) 986 #define matsetvalues4_ MATSETVALUES4 987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 988 #define matsetvalues4_ matsetvalues4 989 #endif 990 991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 992 { 993 Mat A = *AA; 994 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 995 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 996 PetscInt *ai=a->i,*ailen=a->ilen; 997 PetscInt *aj=a->j,brow,bcol; 998 PetscInt ridx,cidx,lastcol = -1; 999 MatScalar *ap,value,*aa=a->a,*bap; 1000 1001 PetscFunctionBegin; 1002 for (k=0; k<m; k++) { /* loop over added rows */ 1003 row = im[k]; brow = row/4; 1004 rp = aj + ai[brow]; 1005 ap = aa + 16*ai[brow]; 1006 nrow = ailen[brow]; 1007 low = 0; 1008 high = nrow; 1009 for (l=0; l<n; l++) { /* loop over added columns */ 1010 col = in[l]; bcol = col/4; 1011 ridx = row % 4; cidx = col % 4; 1012 value = v[l + k*n]; 1013 if (col <= lastcol) low = 0; 1014 else high = nrow; 1015 lastcol = col; 1016 while (high-low > 7) { 1017 t = (low+high)/2; 1018 if (rp[t] > bcol) high = t; 1019 else low = t; 1020 } 1021 for (i=low; i<high; i++) { 1022 if (rp[i] > bcol) break; 1023 if (rp[i] == bcol) { 1024 bap = ap + 16*i + 4*cidx + ridx; 1025 *bap += value; 1026 goto noinsert1; 1027 } 1028 } 1029 N = nrow++ - 1; 1030 high++; /* added new column thus must search to one higher than before */ 1031 /* shift up all the later entries in this row */ 1032 for (ii=N; ii>=i; ii--) { 1033 rp[ii+1] = rp[ii]; 1034 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1035 } 1036 if (N>=i) { 1037 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1038 } 1039 rp[i] = bcol; 1040 ap[16*i + 4*cidx + ridx] = value; 1041 noinsert1:; 1042 low = i; 1043 } 1044 ailen[brow] = nrow; 1045 } 1046 PetscFunctionReturnVoid(); 1047 } 1048 1049 /* 1050 Checks for missing diagonals 1051 */ 1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1053 { 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt *diag,*ii = a->i,i; 1057 1058 PetscFunctionBegin; 1059 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1060 *missing = PETSC_FALSE; 1061 if (A->rmap->n > 0 && !ii) { 1062 *missing = PETSC_TRUE; 1063 if (d) *d = 0; 1064 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1065 } else { 1066 diag = a->diag; 1067 for (i=0; i<a->mbs; i++) { 1068 if (diag[i] >= ii[i+1]) { 1069 *missing = PETSC_TRUE; 1070 if (d) *d = i; 1071 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1072 break; 1073 } 1074 } 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1080 { 1081 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt i,j,m = a->mbs; 1084 1085 PetscFunctionBegin; 1086 if (!a->diag) { 1087 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1088 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1089 a->free_diag = PETSC_TRUE; 1090 } 1091 for (i=0; i<m; i++) { 1092 a->diag[i] = a->i[i+1]; 1093 for (j=a->i[i]; j<a->i[i+1]; j++) { 1094 if (a->j[j] == i) { 1095 a->diag[i] = j; 1096 break; 1097 } 1098 } 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1109 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1110 1111 PetscFunctionBegin; 1112 *nn = n; 1113 if (!ia) PetscFunctionReturn(0); 1114 if (symmetric) { 1115 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1116 nz = tia[n]; 1117 } else { 1118 tia = a->i; tja = a->j; 1119 } 1120 1121 if (!blockcompressed && bs > 1) { 1122 (*nn) *= bs; 1123 /* malloc & create the natural set of indices */ 1124 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1125 if (n) { 1126 (*ia)[0] = oshift; 1127 for (j=1; j<bs; j++) { 1128 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1129 } 1130 } 1131 1132 for (i=1; i<n; i++) { 1133 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1134 for (j=1; j<bs; j++) { 1135 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1136 } 1137 } 1138 if (n) { 1139 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1140 } 1141 1142 if (inja) { 1143 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1144 cnt = 0; 1145 for (i=0; i<n; i++) { 1146 for (j=0; j<bs; j++) { 1147 for (k=tia[i]; k<tia[i+1]; k++) { 1148 for (l=0; l<bs; l++) { 1149 (*ja)[cnt++] = bs*tja[k] + l; 1150 } 1151 } 1152 } 1153 } 1154 } 1155 1156 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1157 ierr = PetscFree(tia);CHKERRQ(ierr); 1158 ierr = PetscFree(tja);CHKERRQ(ierr); 1159 } 1160 } else if (oshift == 1) { 1161 if (symmetric) { 1162 nz = tia[A->rmap->n/bs]; 1163 /* add 1 to i and j indices */ 1164 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1165 *ia = tia; 1166 if (ja) { 1167 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1168 *ja = tja; 1169 } 1170 } else { 1171 nz = a->i[A->rmap->n/bs]; 1172 /* malloc space and add 1 to i and j indices */ 1173 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1174 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1175 if (ja) { 1176 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1177 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1178 } 1179 } 1180 } else { 1181 *ia = tia; 1182 if (ja) *ja = tja; 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1188 { 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 if (!ia) PetscFunctionReturn(0); 1193 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1194 ierr = PetscFree(*ia);CHKERRQ(ierr); 1195 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1201 { 1202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 #if defined(PETSC_USE_LOG) 1207 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1208 #endif 1209 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1210 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1211 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1212 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1213 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1214 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1215 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1216 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1217 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1218 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1219 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1220 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1221 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1222 1223 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1225 ierr = PetscFree(A->data);CHKERRQ(ierr); 1226 1227 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1238 #if defined(PETSC_HAVE_HYPRE) 1239 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1240 #endif 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 switch (op) { 1251 case MAT_ROW_ORIENTED: 1252 a->roworiented = flg; 1253 break; 1254 case MAT_KEEP_NONZERO_PATTERN: 1255 a->keepnonzeropattern = flg; 1256 break; 1257 case MAT_NEW_NONZERO_LOCATIONS: 1258 a->nonew = (flg ? 0 : 1); 1259 break; 1260 case MAT_NEW_NONZERO_LOCATION_ERR: 1261 a->nonew = (flg ? -1 : 0); 1262 break; 1263 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1264 a->nonew = (flg ? -2 : 0); 1265 break; 1266 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1267 a->nounused = (flg ? -1 : 0); 1268 break; 1269 case MAT_NEW_DIAGONALS: 1270 case MAT_IGNORE_OFF_PROC_ENTRIES: 1271 case MAT_USE_HASH_TABLE: 1272 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1273 break; 1274 case MAT_SPD: 1275 case MAT_SYMMETRIC: 1276 case MAT_STRUCTURALLY_SYMMETRIC: 1277 case MAT_HERMITIAN: 1278 case MAT_SYMMETRY_ETERNAL: 1279 case MAT_SUBMAT_SINGLEIS: 1280 /* These options are handled directly by MatSetOption() */ 1281 break; 1282 default: 1283 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1284 } 1285 PetscFunctionReturn(0); 1286 } 1287 1288 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1289 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1290 { 1291 PetscErrorCode ierr; 1292 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1293 MatScalar *aa_i; 1294 PetscScalar *v_i; 1295 1296 PetscFunctionBegin; 1297 bs = A->rmap->bs; 1298 bs2 = bs*bs; 1299 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1300 1301 bn = row/bs; /* Block number */ 1302 bp = row % bs; /* Block Position */ 1303 M = ai[bn+1] - ai[bn]; 1304 *nz = bs*M; 1305 1306 if (v) { 1307 *v = 0; 1308 if (*nz) { 1309 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1310 for (i=0; i<M; i++) { /* for each block in the block row */ 1311 v_i = *v + i*bs; 1312 aa_i = aa + bs2*(ai[bn] + i); 1313 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1314 } 1315 } 1316 } 1317 1318 if (idx) { 1319 *idx = 0; 1320 if (*nz) { 1321 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1322 for (i=0; i<M; i++) { /* for each block in the block row */ 1323 idx_i = *idx + i*bs; 1324 itmp = bs*aj[ai[bn] + i]; 1325 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1326 } 1327 } 1328 } 1329 PetscFunctionReturn(0); 1330 } 1331 1332 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1333 { 1334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1335 PetscErrorCode ierr; 1336 1337 PetscFunctionBegin; 1338 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1348 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1349 PetscFunctionReturn(0); 1350 } 1351 1352 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1353 { 1354 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1355 Mat C; 1356 PetscErrorCode ierr; 1357 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1358 PetscInt *rows,*cols,bs2=a->bs2; 1359 MatScalar *array; 1360 1361 PetscFunctionBegin; 1362 if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1363 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1364 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1365 1366 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1367 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1368 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1369 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1370 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1371 ierr = PetscFree(col);CHKERRQ(ierr); 1372 } else { 1373 C = *B; 1374 } 1375 1376 array = a->a; 1377 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1378 for (i=0; i<mbs; i++) { 1379 cols[0] = i*bs; 1380 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1381 len = ai[i+1] - ai[i]; 1382 for (j=0; j<len; j++) { 1383 rows[0] = (*aj++)*bs; 1384 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1385 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1386 array += bs2; 1387 } 1388 } 1389 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1390 1391 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 1394 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1395 *B = C; 1396 } else { 1397 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1398 } 1399 PetscFunctionReturn(0); 1400 } 1401 1402 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1403 { 1404 PetscErrorCode ierr; 1405 Mat Btrans; 1406 1407 PetscFunctionBegin; 1408 *f = PETSC_FALSE; 1409 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1410 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1411 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1416 { 1417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1420 int fd; 1421 PetscScalar *aa; 1422 FILE *file; 1423 1424 PetscFunctionBegin; 1425 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1426 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1427 col_lens[0] = MAT_FILE_CLASSID; 1428 1429 col_lens[1] = A->rmap->N; 1430 col_lens[2] = A->cmap->n; 1431 col_lens[3] = a->nz*bs2; 1432 1433 /* store lengths of each row and write (including header) to file */ 1434 count = 0; 1435 for (i=0; i<a->mbs; i++) { 1436 for (j=0; j<bs; j++) { 1437 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1438 } 1439 } 1440 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1441 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1442 1443 /* store column indices (zero start index) */ 1444 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1445 count = 0; 1446 for (i=0; i<a->mbs; i++) { 1447 for (j=0; j<bs; j++) { 1448 for (k=a->i[i]; k<a->i[i+1]; k++) { 1449 for (l=0; l<bs; l++) { 1450 jj[count++] = bs*a->j[k] + l; 1451 } 1452 } 1453 } 1454 } 1455 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1456 ierr = PetscFree(jj);CHKERRQ(ierr); 1457 1458 /* store nonzero values */ 1459 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1460 count = 0; 1461 for (i=0; i<a->mbs; i++) { 1462 for (j=0; j<bs; j++) { 1463 for (k=a->i[i]; k<a->i[i+1]; k++) { 1464 for (l=0; l<bs; l++) { 1465 aa[count++] = a->a[bs2*k + l*bs + j]; 1466 } 1467 } 1468 } 1469 } 1470 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1471 ierr = PetscFree(aa);CHKERRQ(ierr); 1472 1473 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1474 if (file) { 1475 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1481 { 1482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1483 PetscErrorCode ierr; 1484 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1485 PetscViewerFormat format; 1486 1487 PetscFunctionBegin; 1488 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1489 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1490 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1491 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1492 const char *matname; 1493 Mat aij; 1494 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1495 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1496 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1497 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1498 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1499 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1500 PetscFunctionReturn(0); 1501 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1502 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1503 for (i=0; i<a->mbs; i++) { 1504 for (j=0; j<bs; j++) { 1505 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1506 for (k=a->i[i]; k<a->i[i+1]; k++) { 1507 for (l=0; l<bs; l++) { 1508 #if defined(PETSC_USE_COMPLEX) 1509 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1510 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1511 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1512 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1513 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1514 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1515 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1516 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1517 } 1518 #else 1519 if (a->a[bs2*k + l*bs + j] != 0.0) { 1520 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1521 } 1522 #endif 1523 } 1524 } 1525 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1526 } 1527 } 1528 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1529 } else { 1530 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1531 for (i=0; i<a->mbs; i++) { 1532 for (j=0; j<bs; j++) { 1533 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1534 for (k=a->i[i]; k<a->i[i+1]; k++) { 1535 for (l=0; l<bs; l++) { 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1539 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1542 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } else { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } 1546 #else 1547 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1548 #endif 1549 } 1550 } 1551 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1552 } 1553 } 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1555 } 1556 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #include <petscdraw.h> 1561 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1562 { 1563 Mat A = (Mat) Aa; 1564 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1565 PetscErrorCode ierr; 1566 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1567 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1568 MatScalar *aa; 1569 PetscViewer viewer; 1570 PetscViewerFormat format; 1571 1572 PetscFunctionBegin; 1573 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1574 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1575 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1576 1577 /* loop over matrix elements drawing boxes */ 1578 1579 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1580 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1581 /* Blue for negative, Cyan for zero and Red for positive */ 1582 color = PETSC_DRAW_BLUE; 1583 for (i=0,row=0; i<mbs; i++,row+=bs) { 1584 for (j=a->i[i]; j<a->i[i+1]; j++) { 1585 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1586 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1587 aa = a->a + j*bs2; 1588 for (k=0; k<bs; k++) { 1589 for (l=0; l<bs; l++) { 1590 if (PetscRealPart(*aa++) >= 0.) continue; 1591 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1592 } 1593 } 1594 } 1595 } 1596 color = PETSC_DRAW_CYAN; 1597 for (i=0,row=0; i<mbs; i++,row+=bs) { 1598 for (j=a->i[i]; j<a->i[i+1]; j++) { 1599 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1600 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1601 aa = a->a + j*bs2; 1602 for (k=0; k<bs; k++) { 1603 for (l=0; l<bs; l++) { 1604 if (PetscRealPart(*aa++) != 0.) continue; 1605 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1606 } 1607 } 1608 } 1609 } 1610 color = PETSC_DRAW_RED; 1611 for (i=0,row=0; i<mbs; i++,row+=bs) { 1612 for (j=a->i[i]; j<a->i[i+1]; j++) { 1613 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1614 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1615 aa = a->a + j*bs2; 1616 for (k=0; k<bs; k++) { 1617 for (l=0; l<bs; l++) { 1618 if (PetscRealPart(*aa++) <= 0.) continue; 1619 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1620 } 1621 } 1622 } 1623 } 1624 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1625 } else { 1626 /* use contour shading to indicate magnitude of values */ 1627 /* first determine max of all nonzero values */ 1628 PetscReal minv = 0.0, maxv = 0.0; 1629 PetscDraw popup; 1630 1631 for (i=0; i<a->nz*a->bs2; i++) { 1632 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1633 } 1634 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1635 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1636 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1637 1638 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1639 for (i=0,row=0; i<mbs; i++,row+=bs) { 1640 for (j=a->i[i]; j<a->i[i+1]; j++) { 1641 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1642 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1643 aa = a->a + j*bs2; 1644 for (k=0; k<bs; k++) { 1645 for (l=0; l<bs; l++) { 1646 MatScalar v = *aa++; 1647 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1648 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 } 1653 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 1658 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1659 { 1660 PetscErrorCode ierr; 1661 PetscReal xl,yl,xr,yr,w,h; 1662 PetscDraw draw; 1663 PetscBool isnull; 1664 1665 PetscFunctionBegin; 1666 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1667 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1668 if (isnull) PetscFunctionReturn(0); 1669 1670 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1671 xr += w; yr += h; xl = -w; yl = -h; 1672 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1673 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1674 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1675 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1676 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1681 { 1682 PetscErrorCode ierr; 1683 PetscBool iascii,isbinary,isdraw; 1684 1685 PetscFunctionBegin; 1686 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1687 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1688 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1689 if (iascii) { 1690 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1691 } else if (isbinary) { 1692 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1693 } else if (isdraw) { 1694 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1695 } else { 1696 Mat B; 1697 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1698 ierr = MatView(B,viewer);CHKERRQ(ierr); 1699 ierr = MatDestroy(&B);CHKERRQ(ierr); 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 1705 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1706 { 1707 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1708 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1709 PetscInt *ai = a->i,*ailen = a->ilen; 1710 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1711 MatScalar *ap,*aa = a->a; 1712 1713 PetscFunctionBegin; 1714 for (k=0; k<m; k++) { /* loop over rows */ 1715 row = im[k]; brow = row/bs; 1716 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1717 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1718 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1719 nrow = ailen[brow]; 1720 for (l=0; l<n; l++) { /* loop over columns */ 1721 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1722 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1723 col = in[l]; 1724 bcol = col/bs; 1725 cidx = col%bs; 1726 ridx = row%bs; 1727 high = nrow; 1728 low = 0; /* assume unsorted */ 1729 while (high-low > 5) { 1730 t = (low+high)/2; 1731 if (rp[t] > bcol) high = t; 1732 else low = t; 1733 } 1734 for (i=low; i<high; i++) { 1735 if (rp[i] > bcol) break; 1736 if (rp[i] == bcol) { 1737 *v++ = ap[bs2*i+bs*cidx+ridx]; 1738 goto finished; 1739 } 1740 } 1741 *v++ = 0.0; 1742 finished:; 1743 } 1744 } 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1749 { 1750 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1751 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1752 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1753 PetscErrorCode ierr; 1754 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1755 PetscBool roworiented=a->roworiented; 1756 const PetscScalar *value = v; 1757 MatScalar *ap,*aa = a->a,*bap; 1758 1759 PetscFunctionBegin; 1760 if (roworiented) { 1761 stepval = (n-1)*bs; 1762 } else { 1763 stepval = (m-1)*bs; 1764 } 1765 for (k=0; k<m; k++) { /* loop over added rows */ 1766 row = im[k]; 1767 if (row < 0) continue; 1768 #if defined(PETSC_USE_DEBUG) 1769 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1770 #endif 1771 rp = aj + ai[row]; 1772 ap = aa + bs2*ai[row]; 1773 rmax = imax[row]; 1774 nrow = ailen[row]; 1775 low = 0; 1776 high = nrow; 1777 for (l=0; l<n; l++) { /* loop over added columns */ 1778 if (in[l] < 0) continue; 1779 #if defined(PETSC_USE_DEBUG) 1780 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1781 #endif 1782 col = in[l]; 1783 if (roworiented) { 1784 value = v + (k*(stepval+bs) + l)*bs; 1785 } else { 1786 value = v + (l*(stepval+bs) + k)*bs; 1787 } 1788 if (col <= lastcol) low = 0; 1789 else high = nrow; 1790 lastcol = col; 1791 while (high-low > 7) { 1792 t = (low+high)/2; 1793 if (rp[t] > col) high = t; 1794 else low = t; 1795 } 1796 for (i=low; i<high; i++) { 1797 if (rp[i] > col) break; 1798 if (rp[i] == col) { 1799 bap = ap + bs2*i; 1800 if (roworiented) { 1801 if (is == ADD_VALUES) { 1802 for (ii=0; ii<bs; ii++,value+=stepval) { 1803 for (jj=ii; jj<bs2; jj+=bs) { 1804 bap[jj] += *value++; 1805 } 1806 } 1807 } else { 1808 for (ii=0; ii<bs; ii++,value+=stepval) { 1809 for (jj=ii; jj<bs2; jj+=bs) { 1810 bap[jj] = *value++; 1811 } 1812 } 1813 } 1814 } else { 1815 if (is == ADD_VALUES) { 1816 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1817 for (jj=0; jj<bs; jj++) { 1818 bap[jj] += value[jj]; 1819 } 1820 bap += bs; 1821 } 1822 } else { 1823 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1824 for (jj=0; jj<bs; jj++) { 1825 bap[jj] = value[jj]; 1826 } 1827 bap += bs; 1828 } 1829 } 1830 } 1831 goto noinsert2; 1832 } 1833 } 1834 if (nonew == 1) goto noinsert2; 1835 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1836 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1837 N = nrow++ - 1; high++; 1838 /* shift up all the later entries in this row */ 1839 for (ii=N; ii>=i; ii--) { 1840 rp[ii+1] = rp[ii]; 1841 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1842 } 1843 if (N >= i) { 1844 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1845 } 1846 rp[i] = col; 1847 bap = ap + bs2*i; 1848 if (roworiented) { 1849 for (ii=0; ii<bs; ii++,value+=stepval) { 1850 for (jj=ii; jj<bs2; jj+=bs) { 1851 bap[jj] = *value++; 1852 } 1853 } 1854 } else { 1855 for (ii=0; ii<bs; ii++,value+=stepval) { 1856 for (jj=0; jj<bs; jj++) { 1857 *bap++ = *value++; 1858 } 1859 } 1860 } 1861 noinsert2:; 1862 low = i; 1863 } 1864 ailen[row] = nrow; 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1870 { 1871 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1872 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1873 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1874 PetscErrorCode ierr; 1875 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1876 MatScalar *aa = a->a,*ap; 1877 PetscReal ratio=0.6; 1878 1879 PetscFunctionBegin; 1880 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1881 1882 if (m) rmax = ailen[0]; 1883 for (i=1; i<mbs; i++) { 1884 /* move each row back by the amount of empty slots (fshift) before it*/ 1885 fshift += imax[i-1] - ailen[i-1]; 1886 rmax = PetscMax(rmax,ailen[i]); 1887 if (fshift) { 1888 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1889 N = ailen[i]; 1890 for (j=0; j<N; j++) { 1891 ip[j-fshift] = ip[j]; 1892 1893 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1894 } 1895 } 1896 ai[i] = ai[i-1] + ailen[i-1]; 1897 } 1898 if (mbs) { 1899 fshift += imax[mbs-1] - ailen[mbs-1]; 1900 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1901 } 1902 1903 /* reset ilen and imax for each row */ 1904 a->nonzerorowcnt = 0; 1905 for (i=0; i<mbs; i++) { 1906 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1907 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1908 } 1909 a->nz = ai[mbs]; 1910 1911 /* diagonals may have moved, so kill the diagonal pointers */ 1912 a->idiagvalid = PETSC_FALSE; 1913 if (fshift && a->diag) { 1914 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1915 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1916 a->diag = 0; 1917 } 1918 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1919 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1920 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1921 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1922 1923 A->info.mallocs += a->reallocs; 1924 a->reallocs = 0; 1925 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1926 a->rmax = rmax; 1927 1928 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 /* 1933 This function returns an array of flags which indicate the locations of contiguous 1934 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1935 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1936 Assume: sizes should be long enough to hold all the values. 1937 */ 1938 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1939 { 1940 PetscInt i,j,k,row; 1941 PetscBool flg; 1942 1943 PetscFunctionBegin; 1944 for (i=0,j=0; i<n; j++) { 1945 row = idx[i]; 1946 if (row%bs!=0) { /* Not the begining of a block */ 1947 sizes[j] = 1; 1948 i++; 1949 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1950 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1951 i++; 1952 } else { /* Begining of the block, so check if the complete block exists */ 1953 flg = PETSC_TRUE; 1954 for (k=1; k<bs; k++) { 1955 if (row+k != idx[i+k]) { /* break in the block */ 1956 flg = PETSC_FALSE; 1957 break; 1958 } 1959 } 1960 if (flg) { /* No break in the bs */ 1961 sizes[j] = bs; 1962 i += bs; 1963 } else { 1964 sizes[j] = 1; 1965 i++; 1966 } 1967 } 1968 } 1969 *bs_max = j; 1970 PetscFunctionReturn(0); 1971 } 1972 1973 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1974 { 1975 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1976 PetscErrorCode ierr; 1977 PetscInt i,j,k,count,*rows; 1978 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1979 PetscScalar zero = 0.0; 1980 MatScalar *aa; 1981 const PetscScalar *xx; 1982 PetscScalar *bb; 1983 1984 PetscFunctionBegin; 1985 /* fix right hand side if needed */ 1986 if (x && b) { 1987 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1988 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1989 for (i=0; i<is_n; i++) { 1990 bb[is_idx[i]] = diag*xx[is_idx[i]]; 1991 } 1992 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1993 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1994 } 1995 1996 /* Make a copy of the IS and sort it */ 1997 /* allocate memory for rows,sizes */ 1998 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 1999 2000 /* copy IS values to rows, and sort them */ 2001 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2002 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2003 2004 if (baij->keepnonzeropattern) { 2005 for (i=0; i<is_n; i++) sizes[i] = 1; 2006 bs_max = is_n; 2007 } else { 2008 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2009 A->nonzerostate++; 2010 } 2011 2012 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2013 row = rows[j]; 2014 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2015 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2016 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2017 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2018 if (diag != (PetscScalar)0.0) { 2019 if (baij->ilen[row/bs] > 0) { 2020 baij->ilen[row/bs] = 1; 2021 baij->j[baij->i[row/bs]] = row/bs; 2022 2023 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2024 } 2025 /* Now insert all the diagonal values for this bs */ 2026 for (k=0; k<bs; k++) { 2027 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2028 } 2029 } else { /* (diag == 0.0) */ 2030 baij->ilen[row/bs] = 0; 2031 } /* end (diag == 0.0) */ 2032 } else { /* (sizes[i] != bs) */ 2033 #if defined(PETSC_USE_DEBUG) 2034 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2035 #endif 2036 for (k=0; k<count; k++) { 2037 aa[0] = zero; 2038 aa += bs; 2039 } 2040 if (diag != (PetscScalar)0.0) { 2041 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2042 } 2043 } 2044 } 2045 2046 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2047 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2052 { 2053 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2054 PetscErrorCode ierr; 2055 PetscInt i,j,k,count; 2056 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2057 PetscScalar zero = 0.0; 2058 MatScalar *aa; 2059 const PetscScalar *xx; 2060 PetscScalar *bb; 2061 PetscBool *zeroed,vecs = PETSC_FALSE; 2062 2063 PetscFunctionBegin; 2064 /* fix right hand side if needed */ 2065 if (x && b) { 2066 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2067 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2068 vecs = PETSC_TRUE; 2069 } 2070 2071 /* zero the columns */ 2072 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2073 for (i=0; i<is_n; i++) { 2074 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2075 zeroed[is_idx[i]] = PETSC_TRUE; 2076 } 2077 for (i=0; i<A->rmap->N; i++) { 2078 if (!zeroed[i]) { 2079 row = i/bs; 2080 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2081 for (k=0; k<bs; k++) { 2082 col = bs*baij->j[j] + k; 2083 if (zeroed[col]) { 2084 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2085 if (vecs) bb[i] -= aa[0]*xx[col]; 2086 aa[0] = 0.0; 2087 } 2088 } 2089 } 2090 } else if (vecs) bb[i] = diag*xx[i]; 2091 } 2092 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2093 if (vecs) { 2094 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2095 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2096 } 2097 2098 /* zero the rows */ 2099 for (i=0; i<is_n; i++) { 2100 row = is_idx[i]; 2101 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2102 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2103 for (k=0; k<count; k++) { 2104 aa[0] = zero; 2105 aa += bs; 2106 } 2107 if (diag != (PetscScalar)0.0) { 2108 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2109 } 2110 } 2111 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2116 { 2117 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2118 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2119 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2120 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2121 PetscErrorCode ierr; 2122 PetscInt ridx,cidx,bs2=a->bs2; 2123 PetscBool roworiented=a->roworiented; 2124 MatScalar *ap,value,*aa=a->a,*bap; 2125 2126 PetscFunctionBegin; 2127 for (k=0; k<m; k++) { /* loop over added rows */ 2128 row = im[k]; 2129 brow = row/bs; 2130 if (row < 0) continue; 2131 #if defined(PETSC_USE_DEBUG) 2132 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2133 #endif 2134 rp = aj + ai[brow]; 2135 ap = aa + bs2*ai[brow]; 2136 rmax = imax[brow]; 2137 nrow = ailen[brow]; 2138 low = 0; 2139 high = nrow; 2140 for (l=0; l<n; l++) { /* loop over added columns */ 2141 if (in[l] < 0) continue; 2142 #if defined(PETSC_USE_DEBUG) 2143 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2144 #endif 2145 col = in[l]; bcol = col/bs; 2146 ridx = row % bs; cidx = col % bs; 2147 if (roworiented) { 2148 value = v[l + k*n]; 2149 } else { 2150 value = v[k + l*m]; 2151 } 2152 if (col <= lastcol) low = 0; else high = nrow; 2153 lastcol = col; 2154 while (high-low > 7) { 2155 t = (low+high)/2; 2156 if (rp[t] > bcol) high = t; 2157 else low = t; 2158 } 2159 for (i=low; i<high; i++) { 2160 if (rp[i] > bcol) break; 2161 if (rp[i] == bcol) { 2162 bap = ap + bs2*i + bs*cidx + ridx; 2163 if (is == ADD_VALUES) *bap += value; 2164 else *bap = value; 2165 goto noinsert1; 2166 } 2167 } 2168 if (nonew == 1) goto noinsert1; 2169 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2170 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2171 N = nrow++ - 1; high++; 2172 /* shift up all the later entries in this row */ 2173 for (ii=N; ii>=i; ii--) { 2174 rp[ii+1] = rp[ii]; 2175 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2176 } 2177 if (N>=i) { 2178 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2179 } 2180 rp[i] = bcol; 2181 ap[bs2*i + bs*cidx + ridx] = value; 2182 a->nz++; 2183 A->nonzerostate++; 2184 noinsert1:; 2185 low = i; 2186 } 2187 ailen[brow] = nrow; 2188 } 2189 PetscFunctionReturn(0); 2190 } 2191 2192 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2193 { 2194 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2195 Mat outA; 2196 PetscErrorCode ierr; 2197 PetscBool row_identity,col_identity; 2198 2199 PetscFunctionBegin; 2200 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2201 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2202 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2203 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2204 2205 outA = inA; 2206 inA->factortype = MAT_FACTOR_LU; 2207 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2208 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2209 2210 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2211 2212 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2213 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2214 a->row = row; 2215 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2216 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2217 a->col = col; 2218 2219 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2220 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2221 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2222 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2223 2224 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2225 if (!a->solve_work) { 2226 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2227 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2228 } 2229 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2234 { 2235 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2236 PetscInt i,nz,mbs; 2237 2238 PetscFunctionBegin; 2239 nz = baij->maxnz; 2240 mbs = baij->mbs; 2241 for (i=0; i<nz; i++) { 2242 baij->j[i] = indices[i]; 2243 } 2244 baij->nz = nz; 2245 for (i=0; i<mbs; i++) { 2246 baij->ilen[i] = baij->imax[i]; 2247 } 2248 PetscFunctionReturn(0); 2249 } 2250 2251 /*@ 2252 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2253 in the matrix. 2254 2255 Input Parameters: 2256 + mat - the SeqBAIJ matrix 2257 - indices - the column indices 2258 2259 Level: advanced 2260 2261 Notes: 2262 This can be called if you have precomputed the nonzero structure of the 2263 matrix and want to provide it to the matrix object to improve the performance 2264 of the MatSetValues() operation. 2265 2266 You MUST have set the correct numbers of nonzeros per row in the call to 2267 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2268 2269 MUST be called before any calls to MatSetValues(); 2270 2271 @*/ 2272 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2273 { 2274 PetscErrorCode ierr; 2275 2276 PetscFunctionBegin; 2277 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2278 PetscValidPointer(indices,2); 2279 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2284 { 2285 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2288 PetscReal atmp; 2289 PetscScalar *x,zero = 0.0; 2290 MatScalar *aa; 2291 PetscInt ncols,brow,krow,kcol; 2292 2293 PetscFunctionBegin; 2294 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2295 bs = A->rmap->bs; 2296 aa = a->a; 2297 ai = a->i; 2298 aj = a->j; 2299 mbs = a->mbs; 2300 2301 ierr = VecSet(v,zero);CHKERRQ(ierr); 2302 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2303 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2304 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2305 for (i=0; i<mbs; i++) { 2306 ncols = ai[1] - ai[0]; ai++; 2307 brow = bs*i; 2308 for (j=0; j<ncols; j++) { 2309 for (kcol=0; kcol<bs; kcol++) { 2310 for (krow=0; krow<bs; krow++) { 2311 atmp = PetscAbsScalar(*aa);aa++; 2312 row = brow + krow; /* row index */ 2313 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2314 } 2315 } 2316 aj++; 2317 } 2318 } 2319 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2324 { 2325 PetscErrorCode ierr; 2326 2327 PetscFunctionBegin; 2328 /* If the two matrices have the same copy implementation, use fast copy. */ 2329 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2331 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2332 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2333 2334 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2335 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2336 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2337 } else { 2338 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2339 } 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2344 { 2345 PetscErrorCode ierr; 2346 2347 PetscFunctionBegin; 2348 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2353 { 2354 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2355 2356 PetscFunctionBegin; 2357 *array = a->a; 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2362 { 2363 PetscFunctionBegin; 2364 PetscFunctionReturn(0); 2365 } 2366 2367 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2368 { 2369 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2370 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2371 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 /* Set the number of nonzeros in the new matrix */ 2376 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2381 { 2382 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2383 PetscErrorCode ierr; 2384 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2385 PetscBLASInt one=1; 2386 2387 PetscFunctionBegin; 2388 if (str == SAME_NONZERO_PATTERN) { 2389 PetscScalar alpha = a; 2390 PetscBLASInt bnz; 2391 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2392 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2393 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2394 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2395 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2396 } else { 2397 Mat B; 2398 PetscInt *nnz; 2399 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2400 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2401 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2402 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2403 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2404 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2405 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2406 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2407 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2408 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2409 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2410 ierr = PetscFree(nnz);CHKERRQ(ierr); 2411 } 2412 PetscFunctionReturn(0); 2413 } 2414 2415 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2416 { 2417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2418 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2419 MatScalar *aa = a->a; 2420 2421 PetscFunctionBegin; 2422 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2427 { 2428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2429 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2430 MatScalar *aa = a->a; 2431 2432 PetscFunctionBegin; 2433 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 /* 2438 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2439 */ 2440 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2441 { 2442 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2443 PetscErrorCode ierr; 2444 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2445 PetscInt nz = a->i[m],row,*jj,mr,col; 2446 2447 PetscFunctionBegin; 2448 *nn = n; 2449 if (!ia) PetscFunctionReturn(0); 2450 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2451 else { 2452 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2453 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2454 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2455 jj = a->j; 2456 for (i=0; i<nz; i++) { 2457 collengths[jj[i]]++; 2458 } 2459 cia[0] = oshift; 2460 for (i=0; i<n; i++) { 2461 cia[i+1] = cia[i] + collengths[i]; 2462 } 2463 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2464 jj = a->j; 2465 for (row=0; row<m; row++) { 2466 mr = a->i[row+1] - a->i[row]; 2467 for (i=0; i<mr; i++) { 2468 col = *jj++; 2469 2470 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2471 } 2472 } 2473 ierr = PetscFree(collengths);CHKERRQ(ierr); 2474 *ia = cia; *ja = cja; 2475 } 2476 PetscFunctionReturn(0); 2477 } 2478 2479 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2480 { 2481 PetscErrorCode ierr; 2482 2483 PetscFunctionBegin; 2484 if (!ia) PetscFunctionReturn(0); 2485 ierr = PetscFree(*ia);CHKERRQ(ierr); 2486 ierr = PetscFree(*ja);CHKERRQ(ierr); 2487 PetscFunctionReturn(0); 2488 } 2489 2490 /* 2491 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2492 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2493 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2494 */ 2495 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2496 { 2497 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2498 PetscErrorCode ierr; 2499 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2500 PetscInt nz = a->i[m],row,*jj,mr,col; 2501 PetscInt *cspidx; 2502 2503 PetscFunctionBegin; 2504 *nn = n; 2505 if (!ia) PetscFunctionReturn(0); 2506 2507 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2508 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2509 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2510 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2511 jj = a->j; 2512 for (i=0; i<nz; i++) { 2513 collengths[jj[i]]++; 2514 } 2515 cia[0] = oshift; 2516 for (i=0; i<n; i++) { 2517 cia[i+1] = cia[i] + collengths[i]; 2518 } 2519 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2520 jj = a->j; 2521 for (row=0; row<m; row++) { 2522 mr = a->i[row+1] - a->i[row]; 2523 for (i=0; i<mr; i++) { 2524 col = *jj++; 2525 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2526 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2527 } 2528 } 2529 ierr = PetscFree(collengths);CHKERRQ(ierr); 2530 *ia = cia; *ja = cja; 2531 *spidx = cspidx; 2532 PetscFunctionReturn(0); 2533 } 2534 2535 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2536 { 2537 PetscErrorCode ierr; 2538 2539 PetscFunctionBegin; 2540 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2541 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2546 { 2547 PetscErrorCode ierr; 2548 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2549 2550 PetscFunctionBegin; 2551 if (!Y->preallocated || !aij->nz) { 2552 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2553 } 2554 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 2558 /* -------------------------------------------------------------------*/ 2559 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2560 MatGetRow_SeqBAIJ, 2561 MatRestoreRow_SeqBAIJ, 2562 MatMult_SeqBAIJ_N, 2563 /* 4*/ MatMultAdd_SeqBAIJ_N, 2564 MatMultTranspose_SeqBAIJ, 2565 MatMultTransposeAdd_SeqBAIJ, 2566 0, 2567 0, 2568 0, 2569 /* 10*/ 0, 2570 MatLUFactor_SeqBAIJ, 2571 0, 2572 0, 2573 MatTranspose_SeqBAIJ, 2574 /* 15*/ MatGetInfo_SeqBAIJ, 2575 MatEqual_SeqBAIJ, 2576 MatGetDiagonal_SeqBAIJ, 2577 MatDiagonalScale_SeqBAIJ, 2578 MatNorm_SeqBAIJ, 2579 /* 20*/ 0, 2580 MatAssemblyEnd_SeqBAIJ, 2581 MatSetOption_SeqBAIJ, 2582 MatZeroEntries_SeqBAIJ, 2583 /* 24*/ MatZeroRows_SeqBAIJ, 2584 0, 2585 0, 2586 0, 2587 0, 2588 /* 29*/ MatSetUp_SeqBAIJ, 2589 0, 2590 0, 2591 0, 2592 0, 2593 /* 34*/ MatDuplicate_SeqBAIJ, 2594 0, 2595 0, 2596 MatILUFactor_SeqBAIJ, 2597 0, 2598 /* 39*/ MatAXPY_SeqBAIJ, 2599 MatCreateSubMatrices_SeqBAIJ, 2600 MatIncreaseOverlap_SeqBAIJ, 2601 MatGetValues_SeqBAIJ, 2602 MatCopy_SeqBAIJ, 2603 /* 44*/ 0, 2604 MatScale_SeqBAIJ, 2605 MatShift_SeqBAIJ, 2606 0, 2607 MatZeroRowsColumns_SeqBAIJ, 2608 /* 49*/ 0, 2609 MatGetRowIJ_SeqBAIJ, 2610 MatRestoreRowIJ_SeqBAIJ, 2611 MatGetColumnIJ_SeqBAIJ, 2612 MatRestoreColumnIJ_SeqBAIJ, 2613 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2614 0, 2615 0, 2616 0, 2617 MatSetValuesBlocked_SeqBAIJ, 2618 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2619 MatDestroy_SeqBAIJ, 2620 MatView_SeqBAIJ, 2621 0, 2622 0, 2623 /* 64*/ 0, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2629 0, 2630 MatConvert_Basic, 2631 0, 2632 0, 2633 /* 74*/ 0, 2634 MatFDColoringApply_BAIJ, 2635 0, 2636 0, 2637 0, 2638 /* 79*/ 0, 2639 0, 2640 0, 2641 0, 2642 MatLoad_SeqBAIJ, 2643 /* 84*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /* 89*/ 0, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /* 94*/ 0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /* 99*/ 0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*104*/ 0, 2664 MatRealPart_SeqBAIJ, 2665 MatImaginaryPart_SeqBAIJ, 2666 0, 2667 0, 2668 /*109*/ 0, 2669 0, 2670 0, 2671 0, 2672 MatMissingDiagonal_SeqBAIJ, 2673 /*114*/ 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*119*/ 0, 2679 0, 2680 MatMultHermitianTranspose_SeqBAIJ, 2681 MatMultHermitianTransposeAdd_SeqBAIJ, 2682 0, 2683 /*124*/ 0, 2684 0, 2685 MatInvertBlockDiagonal_SeqBAIJ, 2686 0, 2687 0, 2688 /*129*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /*134*/ 0, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*139*/ MatSetBlockSizes_Default, 2699 0, 2700 0, 2701 MatFDColoringSetUp_SeqXAIJ, 2702 0, 2703 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2704 }; 2705 2706 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2707 { 2708 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2709 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2710 PetscErrorCode ierr; 2711 2712 PetscFunctionBegin; 2713 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2714 2715 /* allocate space for values if not already there */ 2716 if (!aij->saved_values) { 2717 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2718 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2719 } 2720 2721 /* copy values over */ 2722 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2727 { 2728 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2729 PetscErrorCode ierr; 2730 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2731 2732 PetscFunctionBegin; 2733 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2734 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2735 2736 /* copy values over */ 2737 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2738 PetscFunctionReturn(0); 2739 } 2740 2741 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2742 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2743 2744 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2745 { 2746 Mat_SeqBAIJ *b; 2747 PetscErrorCode ierr; 2748 PetscInt i,mbs,nbs,bs2; 2749 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2750 2751 PetscFunctionBegin; 2752 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2753 if (nz == MAT_SKIP_ALLOCATION) { 2754 skipallocation = PETSC_TRUE; 2755 nz = 0; 2756 } 2757 2758 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2759 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2760 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2761 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2762 2763 B->preallocated = PETSC_TRUE; 2764 2765 mbs = B->rmap->n/bs; 2766 nbs = B->cmap->n/bs; 2767 bs2 = bs*bs; 2768 2769 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2770 2771 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2772 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2773 if (nnz) { 2774 for (i=0; i<mbs; i++) { 2775 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2776 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2777 } 2778 } 2779 2780 b = (Mat_SeqBAIJ*)B->data; 2781 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2782 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2783 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2784 2785 if (!flg) { 2786 switch (bs) { 2787 case 1: 2788 B->ops->mult = MatMult_SeqBAIJ_1; 2789 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2790 break; 2791 case 2: 2792 B->ops->mult = MatMult_SeqBAIJ_2; 2793 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2794 break; 2795 case 3: 2796 B->ops->mult = MatMult_SeqBAIJ_3; 2797 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2798 break; 2799 case 4: 2800 B->ops->mult = MatMult_SeqBAIJ_4; 2801 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2802 break; 2803 case 5: 2804 B->ops->mult = MatMult_SeqBAIJ_5; 2805 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2806 break; 2807 case 6: 2808 B->ops->mult = MatMult_SeqBAIJ_6; 2809 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2810 break; 2811 case 7: 2812 B->ops->mult = MatMult_SeqBAIJ_7; 2813 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2814 break; 2815 case 11: 2816 B->ops->mult = MatMult_SeqBAIJ_11; 2817 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2818 break; 2819 case 15: 2820 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2821 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2822 break; 2823 default: 2824 B->ops->mult = MatMult_SeqBAIJ_N; 2825 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2826 break; 2827 } 2828 } 2829 B->ops->sor = MatSOR_SeqBAIJ; 2830 b->mbs = mbs; 2831 b->nbs = nbs; 2832 if (!skipallocation) { 2833 if (!b->imax) { 2834 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2835 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2836 2837 b->free_imax_ilen = PETSC_TRUE; 2838 } 2839 /* b->ilen will count nonzeros in each block row so far. */ 2840 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2841 if (!nnz) { 2842 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2843 else if (nz < 0) nz = 1; 2844 for (i=0; i<mbs; i++) b->imax[i] = nz; 2845 nz = nz*mbs; 2846 } else { 2847 nz = 0; 2848 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2849 } 2850 2851 /* allocate the matrix space */ 2852 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2853 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2854 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2855 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2856 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2857 2858 b->singlemalloc = PETSC_TRUE; 2859 b->i[0] = 0; 2860 for (i=1; i<mbs+1; i++) { 2861 b->i[i] = b->i[i-1] + b->imax[i-1]; 2862 } 2863 b->free_a = PETSC_TRUE; 2864 b->free_ij = PETSC_TRUE; 2865 } else { 2866 b->free_a = PETSC_FALSE; 2867 b->free_ij = PETSC_FALSE; 2868 } 2869 2870 b->bs2 = bs2; 2871 b->mbs = mbs; 2872 b->nz = 0; 2873 b->maxnz = nz; 2874 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2875 B->was_assembled = PETSC_FALSE; 2876 B->assembled = PETSC_FALSE; 2877 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2878 PetscFunctionReturn(0); 2879 } 2880 2881 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2882 { 2883 PetscInt i,m,nz,nz_max=0,*nnz; 2884 PetscScalar *values=0; 2885 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2886 PetscErrorCode ierr; 2887 2888 PetscFunctionBegin; 2889 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2890 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2891 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2892 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2894 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2895 m = B->rmap->n/bs; 2896 2897 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2898 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2899 for (i=0; i<m; i++) { 2900 nz = ii[i+1]- ii[i]; 2901 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2902 nz_max = PetscMax(nz_max, nz); 2903 nnz[i] = nz; 2904 } 2905 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2906 ierr = PetscFree(nnz);CHKERRQ(ierr); 2907 2908 values = (PetscScalar*)V; 2909 if (!values) { 2910 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2911 } 2912 for (i=0; i<m; i++) { 2913 PetscInt ncols = ii[i+1] - ii[i]; 2914 const PetscInt *icols = jj + ii[i]; 2915 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2916 if (!roworiented) { 2917 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2918 } else { 2919 PetscInt j; 2920 for (j=0; j<ncols; j++) { 2921 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2922 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2923 } 2924 } 2925 } 2926 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2927 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2928 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2929 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2930 PetscFunctionReturn(0); 2931 } 2932 2933 /*MC 2934 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2935 block sparse compressed row format. 2936 2937 Options Database Keys: 2938 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2939 2940 Level: beginner 2941 2942 .seealso: MatCreateSeqBAIJ() 2943 M*/ 2944 2945 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 2946 2947 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2948 { 2949 PetscErrorCode ierr; 2950 PetscMPIInt size; 2951 Mat_SeqBAIJ *b; 2952 2953 PetscFunctionBegin; 2954 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2955 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2956 2957 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2958 B->data = (void*)b; 2959 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2960 2961 b->row = 0; 2962 b->col = 0; 2963 b->icol = 0; 2964 b->reallocs = 0; 2965 b->saved_values = 0; 2966 2967 b->roworiented = PETSC_TRUE; 2968 b->nonew = 0; 2969 b->diag = 0; 2970 B->spptr = 0; 2971 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 2972 b->keepnonzeropattern = PETSC_FALSE; 2973 2974 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2977 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2978 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2979 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2980 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2981 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2982 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 2983 #if defined(PETSC_HAVE_HYPRE) 2984 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2985 #endif 2986 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 2991 { 2992 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2993 PetscErrorCode ierr; 2994 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2995 2996 PetscFunctionBegin; 2997 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 2998 2999 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3000 c->imax = a->imax; 3001 c->ilen = a->ilen; 3002 c->free_imax_ilen = PETSC_FALSE; 3003 } else { 3004 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3005 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3006 for (i=0; i<mbs; i++) { 3007 c->imax[i] = a->imax[i]; 3008 c->ilen[i] = a->ilen[i]; 3009 } 3010 c->free_imax_ilen = PETSC_TRUE; 3011 } 3012 3013 /* allocate the matrix space */ 3014 if (mallocmatspace) { 3015 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3016 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3017 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3018 3019 c->i = a->i; 3020 c->j = a->j; 3021 c->singlemalloc = PETSC_FALSE; 3022 c->free_a = PETSC_TRUE; 3023 c->free_ij = PETSC_FALSE; 3024 c->parent = A; 3025 C->preallocated = PETSC_TRUE; 3026 C->assembled = PETSC_TRUE; 3027 3028 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3029 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3030 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3031 } else { 3032 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3033 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3034 3035 c->singlemalloc = PETSC_TRUE; 3036 c->free_a = PETSC_TRUE; 3037 c->free_ij = PETSC_TRUE; 3038 3039 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3040 if (mbs > 0) { 3041 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3042 if (cpvalues == MAT_COPY_VALUES) { 3043 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3044 } else { 3045 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3046 } 3047 } 3048 C->preallocated = PETSC_TRUE; 3049 C->assembled = PETSC_TRUE; 3050 } 3051 } 3052 3053 c->roworiented = a->roworiented; 3054 c->nonew = a->nonew; 3055 3056 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3057 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3058 3059 c->bs2 = a->bs2; 3060 c->mbs = a->mbs; 3061 c->nbs = a->nbs; 3062 3063 if (a->diag) { 3064 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3065 c->diag = a->diag; 3066 c->free_diag = PETSC_FALSE; 3067 } else { 3068 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3069 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3070 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3071 c->free_diag = PETSC_TRUE; 3072 } 3073 } else c->diag = 0; 3074 3075 c->nz = a->nz; 3076 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3077 c->solve_work = NULL; 3078 c->mult_work = NULL; 3079 c->sor_workt = NULL; 3080 c->sor_work = NULL; 3081 3082 c->compressedrow.use = a->compressedrow.use; 3083 c->compressedrow.nrows = a->compressedrow.nrows; 3084 if (a->compressedrow.use) { 3085 i = a->compressedrow.nrows; 3086 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3087 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3088 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3089 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3090 } else { 3091 c->compressedrow.use = PETSC_FALSE; 3092 c->compressedrow.i = NULL; 3093 c->compressedrow.rindex = NULL; 3094 } 3095 C->nonzerostate = A->nonzerostate; 3096 3097 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3098 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3099 PetscFunctionReturn(0); 3100 } 3101 3102 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3103 { 3104 PetscErrorCode ierr; 3105 3106 PetscFunctionBegin; 3107 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3108 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3109 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3110 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3111 PetscFunctionReturn(0); 3112 } 3113 3114 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3115 { 3116 Mat_SeqBAIJ *a; 3117 PetscErrorCode ierr; 3118 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3119 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3120 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3121 PetscInt *masked,nmask,tmp,bs2,ishift; 3122 PetscMPIInt size; 3123 int fd; 3124 PetscScalar *aa; 3125 MPI_Comm comm; 3126 3127 PetscFunctionBegin; 3128 /* force binary viewer to load .info file if it has not yet done so */ 3129 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3130 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3131 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3132 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3133 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3134 if (bs < 0) bs = 1; 3135 bs2 = bs*bs; 3136 3137 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3138 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3139 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3140 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3141 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3142 M = header[1]; N = header[2]; nz = header[3]; 3143 3144 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3145 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3146 3147 /* 3148 This code adds extra rows to make sure the number of rows is 3149 divisible by the blocksize 3150 */ 3151 mbs = M/bs; 3152 extra_rows = bs - M + bs*(mbs); 3153 if (extra_rows == bs) extra_rows = 0; 3154 else mbs++; 3155 if (extra_rows) { 3156 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3157 } 3158 3159 /* Set global sizes if not already set */ 3160 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3161 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3162 } else { /* Check if the matrix global sizes are correct */ 3163 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3164 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3165 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3166 } 3167 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3168 } 3169 3170 /* read in row lengths */ 3171 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3172 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3173 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3174 3175 /* read in column indices */ 3176 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3177 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3178 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3179 3180 /* loop over row lengths determining block row lengths */ 3181 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3182 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3183 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3184 rowcount = 0; 3185 nzcount = 0; 3186 for (i=0; i<mbs; i++) { 3187 nmask = 0; 3188 for (j=0; j<bs; j++) { 3189 kmax = rowlengths[rowcount]; 3190 for (k=0; k<kmax; k++) { 3191 tmp = jj[nzcount++]/bs; 3192 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3193 } 3194 rowcount++; 3195 } 3196 browlengths[i] += nmask; 3197 /* zero out the mask elements we set */ 3198 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3199 } 3200 3201 /* Do preallocation */ 3202 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3203 a = (Mat_SeqBAIJ*)newmat->data; 3204 3205 /* set matrix "i" values */ 3206 a->i[0] = 0; 3207 for (i=1; i<= mbs; i++) { 3208 a->i[i] = a->i[i-1] + browlengths[i-1]; 3209 a->ilen[i-1] = browlengths[i-1]; 3210 } 3211 a->nz = 0; 3212 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3213 3214 /* read in nonzero values */ 3215 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3216 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3217 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3218 3219 /* set "a" and "j" values into matrix */ 3220 nzcount = 0; jcount = 0; 3221 for (i=0; i<mbs; i++) { 3222 nzcountb = nzcount; 3223 nmask = 0; 3224 for (j=0; j<bs; j++) { 3225 kmax = rowlengths[i*bs+j]; 3226 for (k=0; k<kmax; k++) { 3227 tmp = jj[nzcount++]/bs; 3228 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3229 } 3230 } 3231 /* sort the masked values */ 3232 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3233 3234 /* set "j" values into matrix */ 3235 maskcount = 1; 3236 for (j=0; j<nmask; j++) { 3237 a->j[jcount++] = masked[j]; 3238 mask[masked[j]] = maskcount++; 3239 } 3240 /* set "a" values into matrix */ 3241 ishift = bs2*a->i[i]; 3242 for (j=0; j<bs; j++) { 3243 kmax = rowlengths[i*bs+j]; 3244 for (k=0; k<kmax; k++) { 3245 tmp = jj[nzcountb]/bs; 3246 block = mask[tmp] - 1; 3247 point = jj[nzcountb] - bs*tmp; 3248 idx = ishift + bs2*block + j + bs*point; 3249 a->a[idx] = (MatScalar)aa[nzcountb++]; 3250 } 3251 } 3252 /* zero out the mask elements we set */ 3253 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3254 } 3255 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3256 3257 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3258 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3259 ierr = PetscFree(aa);CHKERRQ(ierr); 3260 ierr = PetscFree(jj);CHKERRQ(ierr); 3261 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3262 3263 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 /*@C 3269 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3270 compressed row) format. For good matrix assembly performance the 3271 user should preallocate the matrix storage by setting the parameter nz 3272 (or the array nnz). By setting these parameters accurately, performance 3273 during matrix assembly can be increased by more than a factor of 50. 3274 3275 Collective on MPI_Comm 3276 3277 Input Parameters: 3278 + comm - MPI communicator, set to PETSC_COMM_SELF 3279 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3280 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3281 . m - number of rows 3282 . n - number of columns 3283 . nz - number of nonzero blocks per block row (same for all rows) 3284 - nnz - array containing the number of nonzero blocks in the various block rows 3285 (possibly different for each block row) or NULL 3286 3287 Output Parameter: 3288 . A - the matrix 3289 3290 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3291 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3292 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3293 3294 Options Database Keys: 3295 . -mat_no_unroll - uses code that does not unroll the loops in the 3296 block calculations (much slower) 3297 . -mat_block_size - size of the blocks to use 3298 3299 Level: intermediate 3300 3301 Notes: 3302 The number of rows and columns must be divisible by blocksize. 3303 3304 If the nnz parameter is given then the nz parameter is ignored 3305 3306 A nonzero block is any block that as 1 or more nonzeros in it 3307 3308 The block AIJ format is fully compatible with standard Fortran 77 3309 storage. That is, the stored row and column indices can begin at 3310 either one (as in Fortran) or zero. See the users' manual for details. 3311 3312 Specify the preallocated storage with either nz or nnz (not both). 3313 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3314 allocation. See Users-Manual: ch_mat for details. 3315 matrices. 3316 3317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3318 @*/ 3319 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3320 { 3321 PetscErrorCode ierr; 3322 3323 PetscFunctionBegin; 3324 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3325 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3326 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3327 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3328 PetscFunctionReturn(0); 3329 } 3330 3331 /*@C 3332 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3333 per row in the matrix. For good matrix assembly performance the 3334 user should preallocate the matrix storage by setting the parameter nz 3335 (or the array nnz). By setting these parameters accurately, performance 3336 during matrix assembly can be increased by more than a factor of 50. 3337 3338 Collective on MPI_Comm 3339 3340 Input Parameters: 3341 + B - the matrix 3342 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3343 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3344 . nz - number of block nonzeros per block row (same for all rows) 3345 - nnz - array containing the number of block nonzeros in the various block rows 3346 (possibly different for each block row) or NULL 3347 3348 Options Database Keys: 3349 . -mat_no_unroll - uses code that does not unroll the loops in the 3350 block calculations (much slower) 3351 . -mat_block_size - size of the blocks to use 3352 3353 Level: intermediate 3354 3355 Notes: 3356 If the nnz parameter is given then the nz parameter is ignored 3357 3358 You can call MatGetInfo() to get information on how effective the preallocation was; 3359 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3360 You can also run with the option -info and look for messages with the string 3361 malloc in them to see if additional memory allocation was needed. 3362 3363 The block AIJ format is fully compatible with standard Fortran 77 3364 storage. That is, the stored row and column indices can begin at 3365 either one (as in Fortran) or zero. See the users' manual for details. 3366 3367 Specify the preallocated storage with either nz or nnz (not both). 3368 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3369 allocation. See Users-Manual: ch_mat for details. 3370 3371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3372 @*/ 3373 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3374 { 3375 PetscErrorCode ierr; 3376 3377 PetscFunctionBegin; 3378 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3379 PetscValidType(B,1); 3380 PetscValidLogicalCollectiveInt(B,bs,2); 3381 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3382 PetscFunctionReturn(0); 3383 } 3384 3385 /*@C 3386 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3387 (the default sequential PETSc format). 3388 3389 Collective on MPI_Comm 3390 3391 Input Parameters: 3392 + B - the matrix 3393 . i - the indices into j for the start of each local row (starts with zero) 3394 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3395 - v - optional values in the matrix 3396 3397 Level: developer 3398 3399 Notes: 3400 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3401 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3402 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3403 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3404 block column and the second index is over columns within a block. 3405 3406 .keywords: matrix, aij, compressed row, sparse 3407 3408 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3409 @*/ 3410 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3411 { 3412 PetscErrorCode ierr; 3413 3414 PetscFunctionBegin; 3415 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3416 PetscValidType(B,1); 3417 PetscValidLogicalCollectiveInt(B,bs,2); 3418 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3419 PetscFunctionReturn(0); 3420 } 3421 3422 3423 /*@ 3424 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3425 3426 Collective on MPI_Comm 3427 3428 Input Parameters: 3429 + comm - must be an MPI communicator of size 1 3430 . bs - size of block 3431 . m - number of rows 3432 . n - number of columns 3433 . i - row indices 3434 . j - column indices 3435 - a - matrix values 3436 3437 Output Parameter: 3438 . mat - the matrix 3439 3440 Level: advanced 3441 3442 Notes: 3443 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3444 once the matrix is destroyed 3445 3446 You cannot set new nonzero locations into this matrix, that will generate an error. 3447 3448 The i and j indices are 0 based 3449 3450 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3451 3452 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3453 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3454 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3455 with column-major ordering within blocks. 3456 3457 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3458 3459 @*/ 3460 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3461 { 3462 PetscErrorCode ierr; 3463 PetscInt ii; 3464 Mat_SeqBAIJ *baij; 3465 3466 PetscFunctionBegin; 3467 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3468 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3469 3470 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3471 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3472 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3473 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3474 baij = (Mat_SeqBAIJ*)(*mat)->data; 3475 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3476 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3477 3478 baij->i = i; 3479 baij->j = j; 3480 baij->a = a; 3481 3482 baij->singlemalloc = PETSC_FALSE; 3483 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3484 baij->free_a = PETSC_FALSE; 3485 baij->free_ij = PETSC_FALSE; 3486 3487 for (ii=0; ii<m; ii++) { 3488 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3489 #if defined(PETSC_USE_DEBUG) 3490 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3491 #endif 3492 } 3493 #if defined(PETSC_USE_DEBUG) 3494 for (ii=0; ii<baij->i[m]; ii++) { 3495 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3496 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3497 } 3498 #endif 3499 3500 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3501 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3502 PetscFunctionReturn(0); 3503 } 3504 3505 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3506 { 3507 PetscErrorCode ierr; 3508 PetscMPIInt size; 3509 3510 PetscFunctionBegin; 3511 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3512 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3513 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3514 } else { 3515 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3516 } 3517 PetscFunctionReturn(0); 3518 } 3519