1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "src/mat/impls/maij/maij.h" 21 #include "src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27 { 28 PetscErrorCode ierr; 29 PetscTruth ismpimaij,isseqmaij; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34 if (ismpimaij) { 35 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36 37 *B = b->A; 38 } else if (isseqmaij) { 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 *B = b->AIJ; 42 } else { 43 *B = A; 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMAIJRedimension" 50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51 { 52 PetscErrorCode ierr; 53 Mat Aij; 54 55 PetscFunctionBegin; 56 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqMAIJ" 63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 67 68 PetscFunctionBegin; 69 if (b->AIJ) { 70 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 71 } 72 ierr = PetscFree(b);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatView_SeqMAIJ" 78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 79 { 80 PetscErrorCode ierr; 81 Mat B; 82 83 PetscFunctionBegin; 84 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 85 ierr = MatView(B,viewer);CHKERRQ(ierr); 86 ierr = MatDestroy(B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatView_MPIMAIJ" 92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 93 { 94 PetscErrorCode ierr; 95 Mat B; 96 97 PetscFunctionBegin; 98 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 99 ierr = MatView(B,viewer);CHKERRQ(ierr); 100 ierr = MatDestroy(B);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_MPIMAIJ" 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 107 { 108 PetscErrorCode ierr; 109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 110 111 PetscFunctionBegin; 112 if (b->AIJ) { 113 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 114 } 115 if (b->OAIJ) { 116 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117 } 118 if (b->A) { 119 ierr = MatDestroy(b->A);CHKERRQ(ierr); 120 } 121 if (b->ctx) { 122 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123 } 124 if (b->w) { 125 ierr = VecDestroy(b->w);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(b);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /*MC 132 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 133 multicomponent problems, interpolating or restricting each component the same way independently. 134 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 135 136 Operations provided: 137 . MatMult 138 . MatMultTranspose 139 . MatMultAdd 140 . MatMultTransposeAdd 141 142 Level: advanced 143 144 .seealso: MatCreateSeqDense 145 M*/ 146 147 EXTERN_C_BEGIN 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatCreate_MAIJ" 150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 151 { 152 PetscErrorCode ierr; 153 Mat_MPIMAIJ *b; 154 155 PetscFunctionBegin; 156 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 157 A->data = (void*)b; 158 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 159 A->factor = 0; 160 A->mapping = 0; 161 162 b->AIJ = 0; 163 b->dof = 0; 164 b->OAIJ = 0; 165 b->ctx = 0; 166 b->w = 0; 167 PetscFunctionReturn(0); 168 } 169 EXTERN_C_END 170 171 /* --------------------------------------------------------------------------------------*/ 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatMult_SeqMAIJ_2" 174 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 175 { 176 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 178 PetscScalar *x,*y,*v,sum1, sum2; 179 PetscErrorCode ierr; 180 PetscInt m = b->AIJ->m,*idx,*ii; 181 PetscInt n,i,jrow,j; 182 183 PetscFunctionBegin; 184 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 185 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 186 idx = a->j; 187 v = a->a; 188 ii = a->i; 189 190 for (i=0; i<m; i++) { 191 jrow = ii[i]; 192 n = ii[i+1] - jrow; 193 sum1 = 0.0; 194 sum2 = 0.0; 195 for (j=0; j<n; j++) { 196 sum1 += v[jrow]*x[2*idx[jrow]]; 197 sum2 += v[jrow]*x[2*idx[jrow]+1]; 198 jrow++; 199 } 200 y[2*i] = sum1; 201 y[2*i+1] = sum2; 202 } 203 204 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 205 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 206 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 212 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 213 { 214 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 215 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 216 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 217 PetscErrorCode ierr; 218 PetscInt m = b->AIJ->m,n,i,*idx; 219 220 PetscFunctionBegin; 221 ierr = VecSet(yy,zero);CHKERRQ(ierr); 222 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 223 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 224 225 for (i=0; i<m; i++) { 226 idx = a->j + a->i[i] ; 227 v = a->a + a->i[i] ; 228 n = a->i[i+1] - a->i[i]; 229 alpha1 = x[2*i]; 230 alpha2 = x[2*i+1]; 231 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 232 } 233 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->n);CHKERRQ(ierr); 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 241 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 242 { 243 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 244 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 245 PetscScalar *x,*y,*v,sum1, sum2; 246 PetscErrorCode ierr; 247 PetscInt m = b->AIJ->m,*idx,*ii; 248 PetscInt n,i,jrow,j; 249 250 PetscFunctionBegin; 251 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 252 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 253 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 254 idx = a->j; 255 v = a->a; 256 ii = a->i; 257 258 for (i=0; i<m; i++) { 259 jrow = ii[i]; 260 n = ii[i+1] - jrow; 261 sum1 = 0.0; 262 sum2 = 0.0; 263 for (j=0; j<n; j++) { 264 sum1 += v[jrow]*x[2*idx[jrow]]; 265 sum2 += v[jrow]*x[2*idx[jrow]+1]; 266 jrow++; 267 } 268 y[2*i] += sum1; 269 y[2*i+1] += sum2; 270 } 271 272 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 273 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 274 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 279 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 280 { 281 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 282 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 283 PetscScalar *x,*y,*v,alpha1,alpha2; 284 PetscErrorCode ierr; 285 PetscInt m = b->AIJ->m,n,i,*idx; 286 287 PetscFunctionBegin; 288 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 289 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 290 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 291 292 for (i=0; i<m; i++) { 293 idx = a->j + a->i[i] ; 294 v = a->a + a->i[i] ; 295 n = a->i[i+1] - a->i[i]; 296 alpha1 = x[2*i]; 297 alpha2 = x[2*i+1]; 298 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 299 } 300 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->n);CHKERRQ(ierr); 301 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 302 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 /* --------------------------------------------------------------------------------------*/ 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatMult_SeqMAIJ_3" 308 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 309 { 310 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 311 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 312 PetscScalar *x,*y,*v,sum1, sum2, sum3; 313 PetscErrorCode ierr; 314 PetscInt m = b->AIJ->m,*idx,*ii; 315 PetscInt n,i,jrow,j; 316 317 PetscFunctionBegin; 318 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 319 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 320 idx = a->j; 321 v = a->a; 322 ii = a->i; 323 324 for (i=0; i<m; i++) { 325 jrow = ii[i]; 326 n = ii[i+1] - jrow; 327 sum1 = 0.0; 328 sum2 = 0.0; 329 sum3 = 0.0; 330 for (j=0; j<n; j++) { 331 sum1 += v[jrow]*x[3*idx[jrow]]; 332 sum2 += v[jrow]*x[3*idx[jrow]+1]; 333 sum3 += v[jrow]*x[3*idx[jrow]+2]; 334 jrow++; 335 } 336 y[3*i] = sum1; 337 y[3*i+1] = sum2; 338 y[3*i+2] = sum3; 339 } 340 341 ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 342 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 349 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 350 { 351 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 353 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 354 PetscErrorCode ierr; 355 PetscInt m = b->AIJ->m,n,i,*idx; 356 357 PetscFunctionBegin; 358 ierr = VecSet(yy,zero);CHKERRQ(ierr); 359 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 360 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 361 362 for (i=0; i<m; i++) { 363 idx = a->j + a->i[i]; 364 v = a->a + a->i[i]; 365 n = a->i[i+1] - a->i[i]; 366 alpha1 = x[3*i]; 367 alpha2 = x[3*i+1]; 368 alpha3 = x[3*i+2]; 369 while (n-->0) { 370 y[3*(*idx)] += alpha1*(*v); 371 y[3*(*idx)+1] += alpha2*(*v); 372 y[3*(*idx)+2] += alpha3*(*v); 373 idx++; v++; 374 } 375 } 376 ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->n);CHKERRQ(ierr); 377 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 378 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 384 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 385 { 386 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 387 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 388 PetscScalar *x,*y,*v,sum1, sum2, sum3; 389 PetscErrorCode ierr; 390 PetscInt m = b->AIJ->m,*idx,*ii; 391 PetscInt n,i,jrow,j; 392 393 PetscFunctionBegin; 394 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 395 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 396 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 397 idx = a->j; 398 v = a->a; 399 ii = a->i; 400 401 for (i=0; i<m; i++) { 402 jrow = ii[i]; 403 n = ii[i+1] - jrow; 404 sum1 = 0.0; 405 sum2 = 0.0; 406 sum3 = 0.0; 407 for (j=0; j<n; j++) { 408 sum1 += v[jrow]*x[3*idx[jrow]]; 409 sum2 += v[jrow]*x[3*idx[jrow]+1]; 410 sum3 += v[jrow]*x[3*idx[jrow]+2]; 411 jrow++; 412 } 413 y[3*i] += sum1; 414 y[3*i+1] += sum2; 415 y[3*i+2] += sum3; 416 } 417 418 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 419 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 420 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 425 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 426 { 427 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 429 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 430 PetscErrorCode ierr; 431 PetscInt m = b->AIJ->m,n,i,*idx; 432 433 PetscFunctionBegin; 434 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 435 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 436 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 437 for (i=0; i<m; i++) { 438 idx = a->j + a->i[i] ; 439 v = a->a + a->i[i] ; 440 n = a->i[i+1] - a->i[i]; 441 alpha1 = x[3*i]; 442 alpha2 = x[3*i+1]; 443 alpha3 = x[3*i+2]; 444 while (n-->0) { 445 y[3*(*idx)] += alpha1*(*v); 446 y[3*(*idx)+1] += alpha2*(*v); 447 y[3*(*idx)+2] += alpha3*(*v); 448 idx++; v++; 449 } 450 } 451 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 452 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 453 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 /* ------------------------------------------------------------------------------*/ 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatMult_SeqMAIJ_4" 460 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 461 { 462 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 464 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 465 PetscErrorCode ierr; 466 PetscInt m = b->AIJ->m,*idx,*ii; 467 PetscInt n,i,jrow,j; 468 469 PetscFunctionBegin; 470 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 471 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 472 idx = a->j; 473 v = a->a; 474 ii = a->i; 475 476 for (i=0; i<m; i++) { 477 jrow = ii[i]; 478 n = ii[i+1] - jrow; 479 sum1 = 0.0; 480 sum2 = 0.0; 481 sum3 = 0.0; 482 sum4 = 0.0; 483 for (j=0; j<n; j++) { 484 sum1 += v[jrow]*x[4*idx[jrow]]; 485 sum2 += v[jrow]*x[4*idx[jrow]+1]; 486 sum3 += v[jrow]*x[4*idx[jrow]+2]; 487 sum4 += v[jrow]*x[4*idx[jrow]+3]; 488 jrow++; 489 } 490 y[4*i] = sum1; 491 y[4*i+1] = sum2; 492 y[4*i+2] = sum3; 493 y[4*i+3] = sum4; 494 } 495 496 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 497 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 498 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 504 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 505 { 506 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 507 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 508 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 509 PetscErrorCode ierr; 510 PetscInt m = b->AIJ->m,n,i,*idx; 511 512 PetscFunctionBegin; 513 ierr = VecSet(yy,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 515 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 516 for (i=0; i<m; i++) { 517 idx = a->j + a->i[i] ; 518 v = a->a + a->i[i] ; 519 n = a->i[i+1] - a->i[i]; 520 alpha1 = x[4*i]; 521 alpha2 = x[4*i+1]; 522 alpha3 = x[4*i+2]; 523 alpha4 = x[4*i+3]; 524 while (n-->0) { 525 y[4*(*idx)] += alpha1*(*v); 526 y[4*(*idx)+1] += alpha2*(*v); 527 y[4*(*idx)+2] += alpha3*(*v); 528 y[4*(*idx)+3] += alpha4*(*v); 529 idx++; v++; 530 } 531 } 532 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->n);CHKERRQ(ierr); 533 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 534 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 540 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 541 { 542 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 543 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 544 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 545 PetscErrorCode ierr; 546 PetscInt m = b->AIJ->m,*idx,*ii; 547 PetscInt n,i,jrow,j; 548 549 PetscFunctionBegin; 550 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 551 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 552 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 553 idx = a->j; 554 v = a->a; 555 ii = a->i; 556 557 for (i=0; i<m; i++) { 558 jrow = ii[i]; 559 n = ii[i+1] - jrow; 560 sum1 = 0.0; 561 sum2 = 0.0; 562 sum3 = 0.0; 563 sum4 = 0.0; 564 for (j=0; j<n; j++) { 565 sum1 += v[jrow]*x[4*idx[jrow]]; 566 sum2 += v[jrow]*x[4*idx[jrow]+1]; 567 sum3 += v[jrow]*x[4*idx[jrow]+2]; 568 sum4 += v[jrow]*x[4*idx[jrow]+3]; 569 jrow++; 570 } 571 y[4*i] += sum1; 572 y[4*i+1] += sum2; 573 y[4*i+2] += sum3; 574 y[4*i+3] += sum4; 575 } 576 577 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 578 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 579 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 584 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 585 { 586 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 587 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 588 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 589 PetscErrorCode ierr; 590 PetscInt m = b->AIJ->m,n,i,*idx; 591 592 PetscFunctionBegin; 593 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 594 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 595 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 596 597 for (i=0; i<m; i++) { 598 idx = a->j + a->i[i] ; 599 v = a->a + a->i[i] ; 600 n = a->i[i+1] - a->i[i]; 601 alpha1 = x[4*i]; 602 alpha2 = x[4*i+1]; 603 alpha3 = x[4*i+2]; 604 alpha4 = x[4*i+3]; 605 while (n-->0) { 606 y[4*(*idx)] += alpha1*(*v); 607 y[4*(*idx)+1] += alpha2*(*v); 608 y[4*(*idx)+2] += alpha3*(*v); 609 y[4*(*idx)+3] += alpha4*(*v); 610 idx++; v++; 611 } 612 } 613 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->n);CHKERRQ(ierr); 614 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 615 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 /* ------------------------------------------------------------------------------*/ 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatMult_SeqMAIJ_5" 622 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 623 { 624 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 625 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 626 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 627 PetscErrorCode ierr; 628 PetscInt m = b->AIJ->m,*idx,*ii; 629 PetscInt n,i,jrow,j; 630 631 PetscFunctionBegin; 632 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 633 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 634 idx = a->j; 635 v = a->a; 636 ii = a->i; 637 638 for (i=0; i<m; i++) { 639 jrow = ii[i]; 640 n = ii[i+1] - jrow; 641 sum1 = 0.0; 642 sum2 = 0.0; 643 sum3 = 0.0; 644 sum4 = 0.0; 645 sum5 = 0.0; 646 for (j=0; j<n; j++) { 647 sum1 += v[jrow]*x[5*idx[jrow]]; 648 sum2 += v[jrow]*x[5*idx[jrow]+1]; 649 sum3 += v[jrow]*x[5*idx[jrow]+2]; 650 sum4 += v[jrow]*x[5*idx[jrow]+3]; 651 sum5 += v[jrow]*x[5*idx[jrow]+4]; 652 jrow++; 653 } 654 y[5*i] = sum1; 655 y[5*i+1] = sum2; 656 y[5*i+2] = sum3; 657 y[5*i+3] = sum4; 658 y[5*i+4] = sum5; 659 } 660 661 ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 662 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 669 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 670 { 671 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 673 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 674 PetscErrorCode ierr; 675 PetscInt m = b->AIJ->m,n,i,*idx; 676 677 PetscFunctionBegin; 678 ierr = VecSet(yy,zero);CHKERRQ(ierr); 679 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 680 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 681 682 for (i=0; i<m; i++) { 683 idx = a->j + a->i[i] ; 684 v = a->a + a->i[i] ; 685 n = a->i[i+1] - a->i[i]; 686 alpha1 = x[5*i]; 687 alpha2 = x[5*i+1]; 688 alpha3 = x[5*i+2]; 689 alpha4 = x[5*i+3]; 690 alpha5 = x[5*i+4]; 691 while (n-->0) { 692 y[5*(*idx)] += alpha1*(*v); 693 y[5*(*idx)+1] += alpha2*(*v); 694 y[5*(*idx)+2] += alpha3*(*v); 695 y[5*(*idx)+3] += alpha4*(*v); 696 y[5*(*idx)+4] += alpha5*(*v); 697 idx++; v++; 698 } 699 } 700 ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->n);CHKERRQ(ierr); 701 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 708 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 709 { 710 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 712 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 713 PetscErrorCode ierr; 714 PetscInt m = b->AIJ->m,*idx,*ii; 715 PetscInt n,i,jrow,j; 716 717 PetscFunctionBegin; 718 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 719 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 720 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 721 idx = a->j; 722 v = a->a; 723 ii = a->i; 724 725 for (i=0; i<m; i++) { 726 jrow = ii[i]; 727 n = ii[i+1] - jrow; 728 sum1 = 0.0; 729 sum2 = 0.0; 730 sum3 = 0.0; 731 sum4 = 0.0; 732 sum5 = 0.0; 733 for (j=0; j<n; j++) { 734 sum1 += v[jrow]*x[5*idx[jrow]]; 735 sum2 += v[jrow]*x[5*idx[jrow]+1]; 736 sum3 += v[jrow]*x[5*idx[jrow]+2]; 737 sum4 += v[jrow]*x[5*idx[jrow]+3]; 738 sum5 += v[jrow]*x[5*idx[jrow]+4]; 739 jrow++; 740 } 741 y[5*i] += sum1; 742 y[5*i+1] += sum2; 743 y[5*i+2] += sum3; 744 y[5*i+3] += sum4; 745 y[5*i+4] += sum5; 746 } 747 748 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 749 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 750 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 756 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 757 { 758 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 759 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 760 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 761 PetscErrorCode ierr; 762 PetscInt m = b->AIJ->m,n,i,*idx; 763 764 PetscFunctionBegin; 765 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 766 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 767 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 768 769 for (i=0; i<m; i++) { 770 idx = a->j + a->i[i] ; 771 v = a->a + a->i[i] ; 772 n = a->i[i+1] - a->i[i]; 773 alpha1 = x[5*i]; 774 alpha2 = x[5*i+1]; 775 alpha3 = x[5*i+2]; 776 alpha4 = x[5*i+3]; 777 alpha5 = x[5*i+4]; 778 while (n-->0) { 779 y[5*(*idx)] += alpha1*(*v); 780 y[5*(*idx)+1] += alpha2*(*v); 781 y[5*(*idx)+2] += alpha3*(*v); 782 y[5*(*idx)+3] += alpha4*(*v); 783 y[5*(*idx)+4] += alpha5*(*v); 784 idx++; v++; 785 } 786 } 787 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 788 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 789 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 /* ------------------------------------------------------------------------------*/ 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatMult_SeqMAIJ_6" 796 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 797 { 798 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 800 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 801 PetscErrorCode ierr; 802 PetscInt m = b->AIJ->m,*idx,*ii; 803 PetscInt n,i,jrow,j; 804 805 PetscFunctionBegin; 806 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 807 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 808 idx = a->j; 809 v = a->a; 810 ii = a->i; 811 812 for (i=0; i<m; i++) { 813 jrow = ii[i]; 814 n = ii[i+1] - jrow; 815 sum1 = 0.0; 816 sum2 = 0.0; 817 sum3 = 0.0; 818 sum4 = 0.0; 819 sum5 = 0.0; 820 sum6 = 0.0; 821 for (j=0; j<n; j++) { 822 sum1 += v[jrow]*x[6*idx[jrow]]; 823 sum2 += v[jrow]*x[6*idx[jrow]+1]; 824 sum3 += v[jrow]*x[6*idx[jrow]+2]; 825 sum4 += v[jrow]*x[6*idx[jrow]+3]; 826 sum5 += v[jrow]*x[6*idx[jrow]+4]; 827 sum6 += v[jrow]*x[6*idx[jrow]+5]; 828 jrow++; 829 } 830 y[6*i] = sum1; 831 y[6*i+1] = sum2; 832 y[6*i+2] = sum3; 833 y[6*i+3] = sum4; 834 y[6*i+4] = sum5; 835 y[6*i+5] = sum6; 836 } 837 838 ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 839 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 840 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 844 #undef __FUNCT__ 845 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 846 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 847 { 848 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 849 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 850 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 851 PetscErrorCode ierr; 852 PetscInt m = b->AIJ->m,n,i,*idx; 853 854 PetscFunctionBegin; 855 ierr = VecSet(yy,zero);CHKERRQ(ierr); 856 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 857 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 858 859 for (i=0; i<m; i++) { 860 idx = a->j + a->i[i] ; 861 v = a->a + a->i[i] ; 862 n = a->i[i+1] - a->i[i]; 863 alpha1 = x[6*i]; 864 alpha2 = x[6*i+1]; 865 alpha3 = x[6*i+2]; 866 alpha4 = x[6*i+3]; 867 alpha5 = x[6*i+4]; 868 alpha6 = x[6*i+5]; 869 while (n-->0) { 870 y[6*(*idx)] += alpha1*(*v); 871 y[6*(*idx)+1] += alpha2*(*v); 872 y[6*(*idx)+2] += alpha3*(*v); 873 y[6*(*idx)+3] += alpha4*(*v); 874 y[6*(*idx)+4] += alpha5*(*v); 875 y[6*(*idx)+5] += alpha6*(*v); 876 idx++; v++; 877 } 878 } 879 ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->n);CHKERRQ(ierr); 880 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 881 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 885 #undef __FUNCT__ 886 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 887 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 888 { 889 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 890 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 891 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 892 PetscErrorCode ierr; 893 PetscInt m = b->AIJ->m,*idx,*ii; 894 PetscInt n,i,jrow,j; 895 896 PetscFunctionBegin; 897 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 898 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 899 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 900 idx = a->j; 901 v = a->a; 902 ii = a->i; 903 904 for (i=0; i<m; i++) { 905 jrow = ii[i]; 906 n = ii[i+1] - jrow; 907 sum1 = 0.0; 908 sum2 = 0.0; 909 sum3 = 0.0; 910 sum4 = 0.0; 911 sum5 = 0.0; 912 sum6 = 0.0; 913 for (j=0; j<n; j++) { 914 sum1 += v[jrow]*x[6*idx[jrow]]; 915 sum2 += v[jrow]*x[6*idx[jrow]+1]; 916 sum3 += v[jrow]*x[6*idx[jrow]+2]; 917 sum4 += v[jrow]*x[6*idx[jrow]+3]; 918 sum5 += v[jrow]*x[6*idx[jrow]+4]; 919 sum6 += v[jrow]*x[6*idx[jrow]+5]; 920 jrow++; 921 } 922 y[6*i] += sum1; 923 y[6*i+1] += sum2; 924 y[6*i+2] += sum3; 925 y[6*i+3] += sum4; 926 y[6*i+4] += sum5; 927 y[6*i+5] += sum6; 928 } 929 930 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 931 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 932 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 938 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 939 { 940 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 941 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 942 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 943 PetscErrorCode ierr; 944 PetscInt m = b->AIJ->m,n,i,*idx; 945 946 PetscFunctionBegin; 947 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 950 951 for (i=0; i<m; i++) { 952 idx = a->j + a->i[i] ; 953 v = a->a + a->i[i] ; 954 n = a->i[i+1] - a->i[i]; 955 alpha1 = x[6*i]; 956 alpha2 = x[6*i+1]; 957 alpha3 = x[6*i+2]; 958 alpha4 = x[6*i+3]; 959 alpha5 = x[6*i+4]; 960 alpha6 = x[6*i+5]; 961 while (n-->0) { 962 y[6*(*idx)] += alpha1*(*v); 963 y[6*(*idx)+1] += alpha2*(*v); 964 y[6*(*idx)+2] += alpha3*(*v); 965 y[6*(*idx)+3] += alpha4*(*v); 966 y[6*(*idx)+4] += alpha5*(*v); 967 y[6*(*idx)+5] += alpha6*(*v); 968 idx++; v++; 969 } 970 } 971 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 972 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 973 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 /* ------------------------------------------------------------------------------*/ 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatMult_SeqMAIJ_7" 980 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 981 { 982 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 983 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 984 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 985 PetscErrorCode ierr; 986 PetscInt m = b->AIJ->m,*idx,*ii; 987 PetscInt n,i,jrow,j; 988 989 PetscFunctionBegin; 990 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 991 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 992 idx = a->j; 993 v = a->a; 994 ii = a->i; 995 996 for (i=0; i<m; i++) { 997 jrow = ii[i]; 998 n = ii[i+1] - jrow; 999 sum1 = 0.0; 1000 sum2 = 0.0; 1001 sum3 = 0.0; 1002 sum4 = 0.0; 1003 sum5 = 0.0; 1004 sum6 = 0.0; 1005 sum7 = 0.0; 1006 for (j=0; j<n; j++) { 1007 sum1 += v[jrow]*x[7*idx[jrow]]; 1008 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1009 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1010 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1011 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1012 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1013 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1014 jrow++; 1015 } 1016 y[7*i] = sum1; 1017 y[7*i+1] = sum2; 1018 y[7*i+2] = sum3; 1019 y[7*i+3] = sum4; 1020 y[7*i+4] = sum5; 1021 y[7*i+5] = sum6; 1022 y[7*i+6] = sum7; 1023 } 1024 1025 ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1026 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1027 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1033 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1034 { 1035 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1036 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1037 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1038 PetscErrorCode ierr; 1039 PetscInt m = b->AIJ->m,n,i,*idx; 1040 1041 PetscFunctionBegin; 1042 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1043 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1044 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1045 1046 for (i=0; i<m; i++) { 1047 idx = a->j + a->i[i] ; 1048 v = a->a + a->i[i] ; 1049 n = a->i[i+1] - a->i[i]; 1050 alpha1 = x[7*i]; 1051 alpha2 = x[7*i+1]; 1052 alpha3 = x[7*i+2]; 1053 alpha4 = x[7*i+3]; 1054 alpha5 = x[7*i+4]; 1055 alpha6 = x[7*i+5]; 1056 alpha7 = x[7*i+6]; 1057 while (n-->0) { 1058 y[7*(*idx)] += alpha1*(*v); 1059 y[7*(*idx)+1] += alpha2*(*v); 1060 y[7*(*idx)+2] += alpha3*(*v); 1061 y[7*(*idx)+3] += alpha4*(*v); 1062 y[7*(*idx)+4] += alpha5*(*v); 1063 y[7*(*idx)+5] += alpha6*(*v); 1064 y[7*(*idx)+6] += alpha7*(*v); 1065 idx++; v++; 1066 } 1067 } 1068 ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->n);CHKERRQ(ierr); 1069 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1070 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1076 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1077 { 1078 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1079 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1080 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1081 PetscErrorCode ierr; 1082 PetscInt m = b->AIJ->m,*idx,*ii; 1083 PetscInt n,i,jrow,j; 1084 1085 PetscFunctionBegin; 1086 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1087 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1088 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1089 idx = a->j; 1090 v = a->a; 1091 ii = a->i; 1092 1093 for (i=0; i<m; i++) { 1094 jrow = ii[i]; 1095 n = ii[i+1] - jrow; 1096 sum1 = 0.0; 1097 sum2 = 0.0; 1098 sum3 = 0.0; 1099 sum4 = 0.0; 1100 sum5 = 0.0; 1101 sum6 = 0.0; 1102 sum7 = 0.0; 1103 for (j=0; j<n; j++) { 1104 sum1 += v[jrow]*x[7*idx[jrow]]; 1105 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1106 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1107 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1108 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1109 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1110 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1111 jrow++; 1112 } 1113 y[7*i] += sum1; 1114 y[7*i+1] += sum2; 1115 y[7*i+2] += sum3; 1116 y[7*i+3] += sum4; 1117 y[7*i+4] += sum5; 1118 y[7*i+5] += sum6; 1119 y[7*i+6] += sum7; 1120 } 1121 1122 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1123 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1124 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1130 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1133 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1134 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1135 PetscErrorCode ierr; 1136 PetscInt m = b->AIJ->m,n,i,*idx; 1137 1138 PetscFunctionBegin; 1139 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1141 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1142 for (i=0; i<m; i++) { 1143 idx = a->j + a->i[i] ; 1144 v = a->a + a->i[i] ; 1145 n = a->i[i+1] - a->i[i]; 1146 alpha1 = x[7*i]; 1147 alpha2 = x[7*i+1]; 1148 alpha3 = x[7*i+2]; 1149 alpha4 = x[7*i+3]; 1150 alpha5 = x[7*i+4]; 1151 alpha6 = x[7*i+5]; 1152 alpha7 = x[7*i+6]; 1153 while (n-->0) { 1154 y[7*(*idx)] += alpha1*(*v); 1155 y[7*(*idx)+1] += alpha2*(*v); 1156 y[7*(*idx)+2] += alpha3*(*v); 1157 y[7*(*idx)+3] += alpha4*(*v); 1158 y[7*(*idx)+4] += alpha5*(*v); 1159 y[7*(*idx)+5] += alpha6*(*v); 1160 y[7*(*idx)+6] += alpha7*(*v); 1161 idx++; v++; 1162 } 1163 } 1164 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1165 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1166 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1172 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1173 { 1174 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1176 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1177 PetscErrorCode ierr; 1178 PetscInt m = b->AIJ->m,*idx,*ii; 1179 PetscInt n,i,jrow,j; 1180 1181 PetscFunctionBegin; 1182 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1183 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1184 idx = a->j; 1185 v = a->a; 1186 ii = a->i; 1187 1188 for (i=0; i<m; i++) { 1189 jrow = ii[i]; 1190 n = ii[i+1] - jrow; 1191 sum1 = 0.0; 1192 sum2 = 0.0; 1193 sum3 = 0.0; 1194 sum4 = 0.0; 1195 sum5 = 0.0; 1196 sum6 = 0.0; 1197 sum7 = 0.0; 1198 sum8 = 0.0; 1199 for (j=0; j<n; j++) { 1200 sum1 += v[jrow]*x[8*idx[jrow]]; 1201 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1202 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1203 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1204 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1205 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1206 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1207 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1208 jrow++; 1209 } 1210 y[8*i] = sum1; 1211 y[8*i+1] = sum2; 1212 y[8*i+2] = sum3; 1213 y[8*i+3] = sum4; 1214 y[8*i+4] = sum5; 1215 y[8*i+5] = sum6; 1216 y[8*i+6] = sum7; 1217 y[8*i+7] = sum8; 1218 } 1219 1220 ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 1221 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1222 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1228 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1229 { 1230 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1231 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1232 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1233 PetscErrorCode ierr; 1234 PetscInt m = b->AIJ->m,n,i,*idx; 1235 1236 PetscFunctionBegin; 1237 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1238 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1239 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1240 1241 for (i=0; i<m; i++) { 1242 idx = a->j + a->i[i] ; 1243 v = a->a + a->i[i] ; 1244 n = a->i[i+1] - a->i[i]; 1245 alpha1 = x[8*i]; 1246 alpha2 = x[8*i+1]; 1247 alpha3 = x[8*i+2]; 1248 alpha4 = x[8*i+3]; 1249 alpha5 = x[8*i+4]; 1250 alpha6 = x[8*i+5]; 1251 alpha7 = x[8*i+6]; 1252 alpha8 = x[8*i+7]; 1253 while (n-->0) { 1254 y[8*(*idx)] += alpha1*(*v); 1255 y[8*(*idx)+1] += alpha2*(*v); 1256 y[8*(*idx)+2] += alpha3*(*v); 1257 y[8*(*idx)+3] += alpha4*(*v); 1258 y[8*(*idx)+4] += alpha5*(*v); 1259 y[8*(*idx)+5] += alpha6*(*v); 1260 y[8*(*idx)+6] += alpha7*(*v); 1261 y[8*(*idx)+7] += alpha8*(*v); 1262 idx++; v++; 1263 } 1264 } 1265 ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->n);CHKERRQ(ierr); 1266 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1267 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1273 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1274 { 1275 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1276 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1277 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1278 PetscErrorCode ierr; 1279 PetscInt m = b->AIJ->m,*idx,*ii; 1280 PetscInt n,i,jrow,j; 1281 1282 PetscFunctionBegin; 1283 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1284 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1285 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1286 idx = a->j; 1287 v = a->a; 1288 ii = a->i; 1289 1290 for (i=0; i<m; i++) { 1291 jrow = ii[i]; 1292 n = ii[i+1] - jrow; 1293 sum1 = 0.0; 1294 sum2 = 0.0; 1295 sum3 = 0.0; 1296 sum4 = 0.0; 1297 sum5 = 0.0; 1298 sum6 = 0.0; 1299 sum7 = 0.0; 1300 sum8 = 0.0; 1301 for (j=0; j<n; j++) { 1302 sum1 += v[jrow]*x[8*idx[jrow]]; 1303 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1304 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1305 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1306 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1307 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1308 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1309 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1310 jrow++; 1311 } 1312 y[8*i] += sum1; 1313 y[8*i+1] += sum2; 1314 y[8*i+2] += sum3; 1315 y[8*i+3] += sum4; 1316 y[8*i+4] += sum5; 1317 y[8*i+5] += sum6; 1318 y[8*i+6] += sum7; 1319 y[8*i+7] += sum8; 1320 } 1321 1322 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1324 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1330 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1331 { 1332 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1333 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1334 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1335 PetscErrorCode ierr; 1336 PetscInt m = b->AIJ->m,n,i,*idx; 1337 1338 PetscFunctionBegin; 1339 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1340 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1341 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1342 for (i=0; i<m; i++) { 1343 idx = a->j + a->i[i] ; 1344 v = a->a + a->i[i] ; 1345 n = a->i[i+1] - a->i[i]; 1346 alpha1 = x[8*i]; 1347 alpha2 = x[8*i+1]; 1348 alpha3 = x[8*i+2]; 1349 alpha4 = x[8*i+3]; 1350 alpha5 = x[8*i+4]; 1351 alpha6 = x[8*i+5]; 1352 alpha7 = x[8*i+6]; 1353 alpha8 = x[8*i+7]; 1354 while (n-->0) { 1355 y[8*(*idx)] += alpha1*(*v); 1356 y[8*(*idx)+1] += alpha2*(*v); 1357 y[8*(*idx)+2] += alpha3*(*v); 1358 y[8*(*idx)+3] += alpha4*(*v); 1359 y[8*(*idx)+4] += alpha5*(*v); 1360 y[8*(*idx)+5] += alpha6*(*v); 1361 y[8*(*idx)+6] += alpha7*(*v); 1362 y[8*(*idx)+7] += alpha8*(*v); 1363 idx++; v++; 1364 } 1365 } 1366 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1367 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1368 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /* ------------------------------------------------------------------------------*/ 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1375 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1376 { 1377 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1378 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1379 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1380 PetscErrorCode ierr; 1381 PetscInt m = b->AIJ->m,*idx,*ii; 1382 PetscInt n,i,jrow,j; 1383 1384 PetscFunctionBegin; 1385 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1386 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1387 idx = a->j; 1388 v = a->a; 1389 ii = a->i; 1390 1391 for (i=0; i<m; i++) { 1392 jrow = ii[i]; 1393 n = ii[i+1] - jrow; 1394 sum1 = 0.0; 1395 sum2 = 0.0; 1396 sum3 = 0.0; 1397 sum4 = 0.0; 1398 sum5 = 0.0; 1399 sum6 = 0.0; 1400 sum7 = 0.0; 1401 sum8 = 0.0; 1402 sum9 = 0.0; 1403 for (j=0; j<n; j++) { 1404 sum1 += v[jrow]*x[9*idx[jrow]]; 1405 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1406 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1407 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1408 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1409 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1410 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1411 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1412 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1413 jrow++; 1414 } 1415 y[9*i] = sum1; 1416 y[9*i+1] = sum2; 1417 y[9*i+2] = sum3; 1418 y[9*i+3] = sum4; 1419 y[9*i+4] = sum5; 1420 y[9*i+5] = sum6; 1421 y[9*i+6] = sum7; 1422 y[9*i+7] = sum8; 1423 y[9*i+8] = sum9; 1424 } 1425 1426 ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 1427 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1428 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 /* ------------------------------------------------------------------------------*/ 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1436 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1437 { 1438 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1439 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1440 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1441 PetscErrorCode ierr; 1442 PetscInt m = b->AIJ->m,n,i,*idx; 1443 1444 PetscFunctionBegin; 1445 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1446 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1447 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1448 1449 for (i=0; i<m; i++) { 1450 idx = a->j + a->i[i] ; 1451 v = a->a + a->i[i] ; 1452 n = a->i[i+1] - a->i[i]; 1453 alpha1 = x[9*i]; 1454 alpha2 = x[9*i+1]; 1455 alpha3 = x[9*i+2]; 1456 alpha4 = x[9*i+3]; 1457 alpha5 = x[9*i+4]; 1458 alpha6 = x[9*i+5]; 1459 alpha7 = x[9*i+6]; 1460 alpha8 = x[9*i+7]; 1461 alpha9 = x[9*i+8]; 1462 while (n-->0) { 1463 y[9*(*idx)] += alpha1*(*v); 1464 y[9*(*idx)+1] += alpha2*(*v); 1465 y[9*(*idx)+2] += alpha3*(*v); 1466 y[9*(*idx)+3] += alpha4*(*v); 1467 y[9*(*idx)+4] += alpha5*(*v); 1468 y[9*(*idx)+5] += alpha6*(*v); 1469 y[9*(*idx)+6] += alpha7*(*v); 1470 y[9*(*idx)+7] += alpha8*(*v); 1471 y[9*(*idx)+8] += alpha9*(*v); 1472 idx++; v++; 1473 } 1474 } 1475 ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->n);CHKERRQ(ierr); 1476 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1477 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1483 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1484 { 1485 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1486 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1487 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1488 PetscErrorCode ierr; 1489 PetscInt m = b->AIJ->m,*idx,*ii; 1490 PetscInt n,i,jrow,j; 1491 1492 PetscFunctionBegin; 1493 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1494 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1495 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1496 idx = a->j; 1497 v = a->a; 1498 ii = a->i; 1499 1500 for (i=0; i<m; i++) { 1501 jrow = ii[i]; 1502 n = ii[i+1] - jrow; 1503 sum1 = 0.0; 1504 sum2 = 0.0; 1505 sum3 = 0.0; 1506 sum4 = 0.0; 1507 sum5 = 0.0; 1508 sum6 = 0.0; 1509 sum7 = 0.0; 1510 sum8 = 0.0; 1511 sum9 = 0.0; 1512 for (j=0; j<n; j++) { 1513 sum1 += v[jrow]*x[9*idx[jrow]]; 1514 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1515 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1516 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1517 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1518 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1519 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1520 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1521 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1522 jrow++; 1523 } 1524 y[9*i] += sum1; 1525 y[9*i+1] += sum2; 1526 y[9*i+2] += sum3; 1527 y[9*i+3] += sum4; 1528 y[9*i+4] += sum5; 1529 y[9*i+5] += sum6; 1530 y[9*i+6] += sum7; 1531 y[9*i+7] += sum8; 1532 y[9*i+8] += sum9; 1533 } 1534 1535 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1536 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1543 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1544 { 1545 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1547 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1548 PetscErrorCode ierr; 1549 PetscInt m = b->AIJ->m,n,i,*idx; 1550 1551 PetscFunctionBegin; 1552 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1553 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1554 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1555 for (i=0; i<m; i++) { 1556 idx = a->j + a->i[i] ; 1557 v = a->a + a->i[i] ; 1558 n = a->i[i+1] - a->i[i]; 1559 alpha1 = x[9*i]; 1560 alpha2 = x[9*i+1]; 1561 alpha3 = x[9*i+2]; 1562 alpha4 = x[9*i+3]; 1563 alpha5 = x[9*i+4]; 1564 alpha6 = x[9*i+5]; 1565 alpha7 = x[9*i+6]; 1566 alpha8 = x[9*i+7]; 1567 alpha9 = x[9*i+8]; 1568 while (n-->0) { 1569 y[9*(*idx)] += alpha1*(*v); 1570 y[9*(*idx)+1] += alpha2*(*v); 1571 y[9*(*idx)+2] += alpha3*(*v); 1572 y[9*(*idx)+3] += alpha4*(*v); 1573 y[9*(*idx)+4] += alpha5*(*v); 1574 y[9*(*idx)+5] += alpha6*(*v); 1575 y[9*(*idx)+6] += alpha7*(*v); 1576 y[9*(*idx)+7] += alpha8*(*v); 1577 y[9*(*idx)+8] += alpha9*(*v); 1578 idx++; v++; 1579 } 1580 } 1581 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1582 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1583 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 /*--------------------------------------------------------------------------------------------*/ 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1589 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1590 { 1591 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1592 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1593 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1594 PetscErrorCode ierr; 1595 PetscInt m = b->AIJ->m,*idx,*ii; 1596 PetscInt n,i,jrow,j; 1597 1598 PetscFunctionBegin; 1599 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1600 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1601 idx = a->j; 1602 v = a->a; 1603 ii = a->i; 1604 1605 for (i=0; i<m; i++) { 1606 jrow = ii[i]; 1607 n = ii[i+1] - jrow; 1608 sum1 = 0.0; 1609 sum2 = 0.0; 1610 sum3 = 0.0; 1611 sum4 = 0.0; 1612 sum5 = 0.0; 1613 sum6 = 0.0; 1614 sum7 = 0.0; 1615 sum8 = 0.0; 1616 sum9 = 0.0; 1617 sum10 = 0.0; 1618 for (j=0; j<n; j++) { 1619 sum1 += v[jrow]*x[10*idx[jrow]]; 1620 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1621 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1622 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1623 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1624 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1625 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1626 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1627 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1628 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1629 jrow++; 1630 } 1631 y[10*i] = sum1; 1632 y[10*i+1] = sum2; 1633 y[10*i+2] = sum3; 1634 y[10*i+3] = sum4; 1635 y[10*i+4] = sum5; 1636 y[10*i+5] = sum6; 1637 y[10*i+6] = sum7; 1638 y[10*i+7] = sum8; 1639 y[10*i+8] = sum9; 1640 y[10*i+9] = sum10; 1641 } 1642 1643 ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1644 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1645 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1651 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1652 { 1653 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1655 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1656 PetscErrorCode ierr; 1657 PetscInt m = b->AIJ->m,*idx,*ii; 1658 PetscInt n,i,jrow,j; 1659 1660 PetscFunctionBegin; 1661 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1662 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1663 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1664 idx = a->j; 1665 v = a->a; 1666 ii = a->i; 1667 1668 for (i=0; i<m; i++) { 1669 jrow = ii[i]; 1670 n = ii[i+1] - jrow; 1671 sum1 = 0.0; 1672 sum2 = 0.0; 1673 sum3 = 0.0; 1674 sum4 = 0.0; 1675 sum5 = 0.0; 1676 sum6 = 0.0; 1677 sum7 = 0.0; 1678 sum8 = 0.0; 1679 sum9 = 0.0; 1680 sum10 = 0.0; 1681 for (j=0; j<n; j++) { 1682 sum1 += v[jrow]*x[10*idx[jrow]]; 1683 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1684 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1685 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1686 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1687 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1688 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1689 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1690 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1691 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1692 jrow++; 1693 } 1694 y[10*i] += sum1; 1695 y[10*i+1] += sum2; 1696 y[10*i+2] += sum3; 1697 y[10*i+3] += sum4; 1698 y[10*i+4] += sum5; 1699 y[10*i+5] += sum6; 1700 y[10*i+6] += sum7; 1701 y[10*i+7] += sum8; 1702 y[10*i+8] += sum9; 1703 y[10*i+9] += sum10; 1704 } 1705 1706 ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1707 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1708 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1714 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1715 { 1716 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1717 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1718 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1719 PetscErrorCode ierr; 1720 PetscInt m = b->AIJ->m,n,i,*idx; 1721 1722 PetscFunctionBegin; 1723 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1724 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1725 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1726 1727 for (i=0; i<m; i++) { 1728 idx = a->j + a->i[i] ; 1729 v = a->a + a->i[i] ; 1730 n = a->i[i+1] - a->i[i]; 1731 alpha1 = x[9*i]; 1732 alpha2 = x[9*i+1]; 1733 alpha3 = x[9*i+2]; 1734 alpha4 = x[9*i+3]; 1735 alpha5 = x[9*i+4]; 1736 alpha6 = x[9*i+5]; 1737 alpha7 = x[9*i+6]; 1738 alpha8 = x[9*i+7]; 1739 alpha9 = x[9*i+8]; 1740 alpha10 = x[10*i+9]; 1741 while (n-->0) { 1742 y[9*(*idx)] += alpha1*(*v); 1743 y[9*(*idx)+1] += alpha2*(*v); 1744 y[9*(*idx)+2] += alpha3*(*v); 1745 y[9*(*idx)+3] += alpha4*(*v); 1746 y[9*(*idx)+4] += alpha5*(*v); 1747 y[9*(*idx)+5] += alpha6*(*v); 1748 y[9*(*idx)+6] += alpha7*(*v); 1749 y[9*(*idx)+7] += alpha8*(*v); 1750 y[9*(*idx)+8] += alpha9*(*v); 1751 y[10*(*idx)+9] += alpha10*(*v); 1752 idx++; v++; 1753 } 1754 } 1755 ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->n);CHKERRQ(ierr); 1756 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1757 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1763 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1764 { 1765 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1766 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1767 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1768 PetscErrorCode ierr; 1769 PetscInt m = b->AIJ->m,n,i,*idx; 1770 1771 PetscFunctionBegin; 1772 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1774 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1775 for (i=0; i<m; i++) { 1776 idx = a->j + a->i[i] ; 1777 v = a->a + a->i[i] ; 1778 n = a->i[i+1] - a->i[i]; 1779 alpha1 = x[10*i]; 1780 alpha2 = x[10*i+1]; 1781 alpha3 = x[10*i+2]; 1782 alpha4 = x[10*i+3]; 1783 alpha5 = x[10*i+4]; 1784 alpha6 = x[10*i+5]; 1785 alpha7 = x[10*i+6]; 1786 alpha8 = x[10*i+7]; 1787 alpha9 = x[10*i+8]; 1788 alpha10 = x[10*i+9]; 1789 while (n-->0) { 1790 y[10*(*idx)] += alpha1*(*v); 1791 y[10*(*idx)+1] += alpha2*(*v); 1792 y[10*(*idx)+2] += alpha3*(*v); 1793 y[10*(*idx)+3] += alpha4*(*v); 1794 y[10*(*idx)+4] += alpha5*(*v); 1795 y[10*(*idx)+5] += alpha6*(*v); 1796 y[10*(*idx)+6] += alpha7*(*v); 1797 y[10*(*idx)+7] += alpha8*(*v); 1798 y[10*(*idx)+8] += alpha9*(*v); 1799 y[10*(*idx)+9] += alpha10*(*v); 1800 idx++; v++; 1801 } 1802 } 1803 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1804 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1805 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 1810 /*--------------------------------------------------------------------------------------------*/ 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1813 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1814 { 1815 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1817 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1818 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1819 PetscErrorCode ierr; 1820 PetscInt m = b->AIJ->m,*idx,*ii; 1821 PetscInt n,i,jrow,j; 1822 1823 PetscFunctionBegin; 1824 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1825 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1826 idx = a->j; 1827 v = a->a; 1828 ii = a->i; 1829 1830 for (i=0; i<m; i++) { 1831 jrow = ii[i]; 1832 n = ii[i+1] - jrow; 1833 sum1 = 0.0; 1834 sum2 = 0.0; 1835 sum3 = 0.0; 1836 sum4 = 0.0; 1837 sum5 = 0.0; 1838 sum6 = 0.0; 1839 sum7 = 0.0; 1840 sum8 = 0.0; 1841 sum9 = 0.0; 1842 sum10 = 0.0; 1843 sum11 = 0.0; 1844 sum12 = 0.0; 1845 sum13 = 0.0; 1846 sum14 = 0.0; 1847 sum15 = 0.0; 1848 sum16 = 0.0; 1849 for (j=0; j<n; j++) { 1850 sum1 += v[jrow]*x[16*idx[jrow]]; 1851 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1852 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1853 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1854 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1855 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1856 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1857 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1858 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1859 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1860 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1861 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1862 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1863 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1864 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1865 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1866 jrow++; 1867 } 1868 y[16*i] = sum1; 1869 y[16*i+1] = sum2; 1870 y[16*i+2] = sum3; 1871 y[16*i+3] = sum4; 1872 y[16*i+4] = sum5; 1873 y[16*i+5] = sum6; 1874 y[16*i+6] = sum7; 1875 y[16*i+7] = sum8; 1876 y[16*i+8] = sum9; 1877 y[16*i+9] = sum10; 1878 y[16*i+10] = sum11; 1879 y[16*i+11] = sum12; 1880 y[16*i+12] = sum13; 1881 y[16*i+13] = sum14; 1882 y[16*i+14] = sum15; 1883 y[16*i+15] = sum16; 1884 } 1885 1886 ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 1887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1888 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1894 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1895 { 1896 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1897 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1898 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1899 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1900 PetscErrorCode ierr; 1901 PetscInt m = b->AIJ->m,n,i,*idx; 1902 1903 PetscFunctionBegin; 1904 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1905 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1906 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1907 1908 for (i=0; i<m; i++) { 1909 idx = a->j + a->i[i] ; 1910 v = a->a + a->i[i] ; 1911 n = a->i[i+1] - a->i[i]; 1912 alpha1 = x[16*i]; 1913 alpha2 = x[16*i+1]; 1914 alpha3 = x[16*i+2]; 1915 alpha4 = x[16*i+3]; 1916 alpha5 = x[16*i+4]; 1917 alpha6 = x[16*i+5]; 1918 alpha7 = x[16*i+6]; 1919 alpha8 = x[16*i+7]; 1920 alpha9 = x[16*i+8]; 1921 alpha10 = x[16*i+9]; 1922 alpha11 = x[16*i+10]; 1923 alpha12 = x[16*i+11]; 1924 alpha13 = x[16*i+12]; 1925 alpha14 = x[16*i+13]; 1926 alpha15 = x[16*i+14]; 1927 alpha16 = x[16*i+15]; 1928 while (n-->0) { 1929 y[16*(*idx)] += alpha1*(*v); 1930 y[16*(*idx)+1] += alpha2*(*v); 1931 y[16*(*idx)+2] += alpha3*(*v); 1932 y[16*(*idx)+3] += alpha4*(*v); 1933 y[16*(*idx)+4] += alpha5*(*v); 1934 y[16*(*idx)+5] += alpha6*(*v); 1935 y[16*(*idx)+6] += alpha7*(*v); 1936 y[16*(*idx)+7] += alpha8*(*v); 1937 y[16*(*idx)+8] += alpha9*(*v); 1938 y[16*(*idx)+9] += alpha10*(*v); 1939 y[16*(*idx)+10] += alpha11*(*v); 1940 y[16*(*idx)+11] += alpha12*(*v); 1941 y[16*(*idx)+12] += alpha13*(*v); 1942 y[16*(*idx)+13] += alpha14*(*v); 1943 y[16*(*idx)+14] += alpha15*(*v); 1944 y[16*(*idx)+15] += alpha16*(*v); 1945 idx++; v++; 1946 } 1947 } 1948 ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->n);CHKERRQ(ierr); 1949 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1950 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1956 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1957 { 1958 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1959 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1960 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1961 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1962 PetscErrorCode ierr; 1963 PetscInt m = b->AIJ->m,*idx,*ii; 1964 PetscInt n,i,jrow,j; 1965 1966 PetscFunctionBegin; 1967 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1969 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1970 idx = a->j; 1971 v = a->a; 1972 ii = a->i; 1973 1974 for (i=0; i<m; i++) { 1975 jrow = ii[i]; 1976 n = ii[i+1] - jrow; 1977 sum1 = 0.0; 1978 sum2 = 0.0; 1979 sum3 = 0.0; 1980 sum4 = 0.0; 1981 sum5 = 0.0; 1982 sum6 = 0.0; 1983 sum7 = 0.0; 1984 sum8 = 0.0; 1985 sum9 = 0.0; 1986 sum10 = 0.0; 1987 sum11 = 0.0; 1988 sum12 = 0.0; 1989 sum13 = 0.0; 1990 sum14 = 0.0; 1991 sum15 = 0.0; 1992 sum16 = 0.0; 1993 for (j=0; j<n; j++) { 1994 sum1 += v[jrow]*x[16*idx[jrow]]; 1995 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1996 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1997 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1998 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1999 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2000 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2001 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2002 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2003 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2004 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2005 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2006 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2007 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2008 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2009 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2010 jrow++; 2011 } 2012 y[16*i] += sum1; 2013 y[16*i+1] += sum2; 2014 y[16*i+2] += sum3; 2015 y[16*i+3] += sum4; 2016 y[16*i+4] += sum5; 2017 y[16*i+5] += sum6; 2018 y[16*i+6] += sum7; 2019 y[16*i+7] += sum8; 2020 y[16*i+8] += sum9; 2021 y[16*i+9] += sum10; 2022 y[16*i+10] += sum11; 2023 y[16*i+11] += sum12; 2024 y[16*i+12] += sum13; 2025 y[16*i+13] += sum14; 2026 y[16*i+14] += sum15; 2027 y[16*i+15] += sum16; 2028 } 2029 2030 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2031 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2032 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2038 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2039 { 2040 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2041 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2042 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2043 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2044 PetscErrorCode ierr; 2045 PetscInt m = b->AIJ->m,n,i,*idx; 2046 2047 PetscFunctionBegin; 2048 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2049 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2050 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2051 for (i=0; i<m; i++) { 2052 idx = a->j + a->i[i] ; 2053 v = a->a + a->i[i] ; 2054 n = a->i[i+1] - a->i[i]; 2055 alpha1 = x[16*i]; 2056 alpha2 = x[16*i+1]; 2057 alpha3 = x[16*i+2]; 2058 alpha4 = x[16*i+3]; 2059 alpha5 = x[16*i+4]; 2060 alpha6 = x[16*i+5]; 2061 alpha7 = x[16*i+6]; 2062 alpha8 = x[16*i+7]; 2063 alpha9 = x[16*i+8]; 2064 alpha10 = x[16*i+9]; 2065 alpha11 = x[16*i+10]; 2066 alpha12 = x[16*i+11]; 2067 alpha13 = x[16*i+12]; 2068 alpha14 = x[16*i+13]; 2069 alpha15 = x[16*i+14]; 2070 alpha16 = x[16*i+15]; 2071 while (n-->0) { 2072 y[16*(*idx)] += alpha1*(*v); 2073 y[16*(*idx)+1] += alpha2*(*v); 2074 y[16*(*idx)+2] += alpha3*(*v); 2075 y[16*(*idx)+3] += alpha4*(*v); 2076 y[16*(*idx)+4] += alpha5*(*v); 2077 y[16*(*idx)+5] += alpha6*(*v); 2078 y[16*(*idx)+6] += alpha7*(*v); 2079 y[16*(*idx)+7] += alpha8*(*v); 2080 y[16*(*idx)+8] += alpha9*(*v); 2081 y[16*(*idx)+9] += alpha10*(*v); 2082 y[16*(*idx)+10] += alpha11*(*v); 2083 y[16*(*idx)+11] += alpha12*(*v); 2084 y[16*(*idx)+12] += alpha13*(*v); 2085 y[16*(*idx)+13] += alpha14*(*v); 2086 y[16*(*idx)+14] += alpha15*(*v); 2087 y[16*(*idx)+15] += alpha16*(*v); 2088 idx++; v++; 2089 } 2090 } 2091 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2093 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 /*===================================================================================*/ 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2100 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2101 { 2102 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2103 PetscErrorCode ierr; 2104 2105 PetscFunctionBegin; 2106 /* start the scatter */ 2107 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2108 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2109 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2110 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2116 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2117 { 2118 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2123 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2124 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2125 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2131 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2132 { 2133 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 /* start the scatter */ 2138 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2139 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2140 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2141 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2147 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2148 { 2149 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2154 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2155 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2156 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2162 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2163 { 2164 /* This routine requires testing -- but it's getting better. */ 2165 PetscErrorCode ierr; 2166 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2167 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2168 Mat P=pp->AIJ; 2169 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2170 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2171 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2172 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn; 2173 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2174 MatScalar *ca; 2175 2176 PetscFunctionBegin; 2177 /* Start timer */ 2178 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2179 2180 /* Get ij structure of P^T */ 2181 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2182 2183 cn = pn*ppdof; 2184 /* Allocate ci array, arrays for fill computation and */ 2185 /* free space for accumulating nonzero column info */ 2186 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2187 ci[0] = 0; 2188 2189 /* Work arrays for rows of P^T*A */ 2190 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2191 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2192 ptasparserow = ptadenserow + an; 2193 denserow = ptasparserow + an; 2194 sparserow = denserow + cn; 2195 2196 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2197 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2198 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2199 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2200 current_space = free_space; 2201 2202 /* Determine symbolic info for each row of C: */ 2203 for (i=0;i<pn;i++) { 2204 ptnzi = pti[i+1] - pti[i]; 2205 ptJ = ptj + pti[i]; 2206 for (dof=0;dof<ppdof;dof++) { 2207 ptanzi = 0; 2208 /* Determine symbolic row of PtA: */ 2209 for (j=0;j<ptnzi;j++) { 2210 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2211 arow = ptJ[j]*ppdof + dof; 2212 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2213 anzj = ai[arow+1] - ai[arow]; 2214 ajj = aj + ai[arow]; 2215 for (k=0;k<anzj;k++) { 2216 if (!ptadenserow[ajj[k]]) { 2217 ptadenserow[ajj[k]] = -1; 2218 ptasparserow[ptanzi++] = ajj[k]; 2219 } 2220 } 2221 } 2222 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2223 ptaj = ptasparserow; 2224 cnzi = 0; 2225 for (j=0;j<ptanzi;j++) { 2226 /* Get offset within block of P */ 2227 pshift = *ptaj%ppdof; 2228 /* Get block row of P */ 2229 prow = (*ptaj++)/ppdof; /* integer division */ 2230 /* P has same number of nonzeros per row as the compressed form */ 2231 pnzj = pi[prow+1] - pi[prow]; 2232 pjj = pj + pi[prow]; 2233 for (k=0;k<pnzj;k++) { 2234 /* Locations in C are shifted by the offset within the block */ 2235 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2236 if (!denserow[pjj[k]*ppdof+pshift]) { 2237 denserow[pjj[k]*ppdof+pshift] = -1; 2238 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2239 } 2240 } 2241 } 2242 2243 /* sort sparserow */ 2244 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2245 2246 /* If free space is not available, make more free space */ 2247 /* Double the amount of total space in the list */ 2248 if (current_space->local_remaining<cnzi) { 2249 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2250 } 2251 2252 /* Copy data into free space, and zero out denserows */ 2253 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2254 current_space->array += cnzi; 2255 current_space->local_used += cnzi; 2256 current_space->local_remaining -= cnzi; 2257 2258 for (j=0;j<ptanzi;j++) { 2259 ptadenserow[ptasparserow[j]] = 0; 2260 } 2261 for (j=0;j<cnzi;j++) { 2262 denserow[sparserow[j]] = 0; 2263 } 2264 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2265 /* For now, we will recompute what is needed. */ 2266 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2267 } 2268 } 2269 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2270 /* Allocate space for cj, initialize cj, and */ 2271 /* destroy list of free space and other temporary array(s) */ 2272 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2273 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2274 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2275 2276 /* Allocate space for ca */ 2277 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2278 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2279 2280 /* put together the new matrix */ 2281 ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2282 2283 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2284 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2285 c = (Mat_SeqAIJ *)((*C)->data); 2286 c->freedata = PETSC_TRUE; 2287 c->nonew = 0; 2288 2289 /* Clean up. */ 2290 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2291 2292 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2298 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2299 { 2300 /* This routine requires testing -- first draft only */ 2301 PetscErrorCode ierr; 2302 PetscInt flops=0; 2303 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2304 Mat P=pp->AIJ; 2305 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2306 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2307 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2308 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2309 PetscInt *ci=c->i,*cj=c->j,*cjj; 2310 PetscInt am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof; 2311 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2312 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2313 2314 PetscFunctionBegin; 2315 /* Allocate temporary array for storage of one row of A*P */ 2316 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2317 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2318 2319 apj = (PetscInt *)(apa + cn); 2320 apjdense = apj + cn; 2321 2322 /* Clear old values in C */ 2323 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2324 2325 for (i=0;i<am;i++) { 2326 /* Form sparse row of A*P */ 2327 anzi = ai[i+1] - ai[i]; 2328 apnzj = 0; 2329 for (j=0;j<anzi;j++) { 2330 /* Get offset within block of P */ 2331 pshift = *aj%ppdof; 2332 /* Get block row of P */ 2333 prow = *aj++/ppdof; /* integer division */ 2334 pnzj = pi[prow+1] - pi[prow]; 2335 pjj = pj + pi[prow]; 2336 paj = pa + pi[prow]; 2337 for (k=0;k<pnzj;k++) { 2338 poffset = pjj[k]*ppdof+pshift; 2339 if (!apjdense[poffset]) { 2340 apjdense[poffset] = -1; 2341 apj[apnzj++] = poffset; 2342 } 2343 apa[poffset] += (*aa)*paj[k]; 2344 } 2345 flops += 2*pnzj; 2346 aa++; 2347 } 2348 2349 /* Sort the j index array for quick sparse axpy. */ 2350 /* Note: a array does not need sorting as it is in dense storage locations. */ 2351 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2352 2353 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2354 prow = i/ppdof; /* integer division */ 2355 pshift = i%ppdof; 2356 poffset = pi[prow]; 2357 pnzi = pi[prow+1] - poffset; 2358 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2359 pJ = pj+poffset; 2360 pA = pa+poffset; 2361 for (j=0;j<pnzi;j++) { 2362 crow = (*pJ)*ppdof+pshift; 2363 cjj = cj + ci[crow]; 2364 caj = ca + ci[crow]; 2365 pJ++; 2366 /* Perform sparse axpy operation. Note cjj includes apj. */ 2367 for (k=0,nextap=0;nextap<apnzj;k++) { 2368 if (cjj[k]==apj[nextap]) { 2369 caj[k] += (*pA)*apa[apj[nextap++]]; 2370 } 2371 } 2372 flops += 2*apnzj; 2373 pA++; 2374 } 2375 2376 /* Zero the current row info for A*P */ 2377 for (j=0;j<apnzj;j++) { 2378 apa[apj[j]] = 0.; 2379 apjdense[apj[j]] = 0; 2380 } 2381 } 2382 2383 /* Assemble the final matrix and clean up */ 2384 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2385 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 ierr = PetscFree(apa);CHKERRQ(ierr); 2387 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2388 2389 PetscFunctionReturn(0); 2390 } 2391 2392 EXTERN_C_BEGIN 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2395 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2396 { 2397 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2398 Mat a = b->AIJ,B; 2399 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2400 PetscErrorCode ierr; 2401 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2402 PetscInt *cols; 2403 PetscScalar *vals; 2404 2405 PetscFunctionBegin; 2406 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2407 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2408 for (i=0; i<m; i++) { 2409 nmax = PetscMax(nmax,aij->ilen[i]); 2410 for (j=0; j<dof; j++) { 2411 ilen[dof*i+j] = aij->ilen[i]; 2412 } 2413 } 2414 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2415 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2416 ierr = PetscFree(ilen);CHKERRQ(ierr); 2417 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2418 ii = 0; 2419 for (i=0; i<m; i++) { 2420 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2421 for (j=0; j<dof; j++) { 2422 for (k=0; k<ncols; k++) { 2423 icols[k] = dof*cols[k]+j; 2424 } 2425 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2426 ii++; 2427 } 2428 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2429 } 2430 ierr = PetscFree(icols);CHKERRQ(ierr); 2431 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2432 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2433 2434 if (reuse == MAT_REUSE_MATRIX) { 2435 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2436 } else { 2437 *newmat = B; 2438 } 2439 PetscFunctionReturn(0); 2440 } 2441 EXTERN_C_END 2442 2443 #include "src/mat/impls/aij/mpi/mpiaij.h" 2444 2445 EXTERN_C_BEGIN 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2448 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2449 { 2450 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2451 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2452 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2453 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2454 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2455 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2456 PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols; 2457 PetscInt rstart,cstart,*garray,ii,k; 2458 PetscErrorCode ierr; 2459 PetscScalar *vals,*ovals; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr); 2463 for (i=0; i<A->m/dof; i++) { 2464 nmax = PetscMax(nmax,AIJ->ilen[i]); 2465 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2466 for (j=0; j<dof; j++) { 2467 dnz[dof*i+j] = AIJ->ilen[i]; 2468 onz[dof*i+j] = OAIJ->ilen[i]; 2469 } 2470 } 2471 ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2472 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2473 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2474 2475 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2476 rstart = dof*mpiaij->rstart; 2477 cstart = dof*mpiaij->cstart; 2478 garray = mpiaij->garray; 2479 2480 ii = rstart; 2481 for (i=0; i<A->m/dof; i++) { 2482 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2483 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2484 for (j=0; j<dof; j++) { 2485 for (k=0; k<ncols; k++) { 2486 icols[k] = cstart + dof*cols[k]+j; 2487 } 2488 for (k=0; k<oncols; k++) { 2489 oicols[k] = dof*garray[ocols[k]]+j; 2490 } 2491 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2492 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2493 ii++; 2494 } 2495 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2496 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2497 } 2498 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2499 2500 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2501 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2502 2503 if (reuse == MAT_REUSE_MATRIX) { 2504 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2505 } else { 2506 *newmat = B; 2507 } 2508 PetscFunctionReturn(0); 2509 } 2510 EXTERN_C_END 2511 2512 2513 /* ---------------------------------------------------------------------------------- */ 2514 /*MC 2515 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2516 operations for multicomponent problems. It interpolates each component the same 2517 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2518 and MATMPIAIJ for distributed matrices. 2519 2520 Operations provided: 2521 + MatMult 2522 . MatMultTranspose 2523 . MatMultAdd 2524 . MatMultTransposeAdd 2525 - MatView 2526 2527 Level: advanced 2528 2529 M*/ 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "MatCreateMAIJ" 2532 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2533 { 2534 PetscErrorCode ierr; 2535 PetscMPIInt size; 2536 PetscInt n; 2537 Mat_MPIMAIJ *b; 2538 Mat B; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2542 2543 if (dof == 1) { 2544 *maij = A; 2545 } else { 2546 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 2547 ierr = MatSetSizes(B,dof*A->m,dof*A->n,dof*A->M,dof*A->N);CHKERRQ(ierr); 2548 B->assembled = PETSC_TRUE; 2549 2550 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2551 if (size == 1) { 2552 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2553 B->ops->destroy = MatDestroy_SeqMAIJ; 2554 B->ops->view = MatView_SeqMAIJ; 2555 b = (Mat_MPIMAIJ*)B->data; 2556 b->dof = dof; 2557 b->AIJ = A; 2558 if (dof == 2) { 2559 B->ops->mult = MatMult_SeqMAIJ_2; 2560 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2561 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2562 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2563 } else if (dof == 3) { 2564 B->ops->mult = MatMult_SeqMAIJ_3; 2565 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2566 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2567 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2568 } else if (dof == 4) { 2569 B->ops->mult = MatMult_SeqMAIJ_4; 2570 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2571 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2572 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2573 } else if (dof == 5) { 2574 B->ops->mult = MatMult_SeqMAIJ_5; 2575 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2576 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2577 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2578 } else if (dof == 6) { 2579 B->ops->mult = MatMult_SeqMAIJ_6; 2580 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2581 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2582 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2583 } else if (dof == 7) { 2584 B->ops->mult = MatMult_SeqMAIJ_7; 2585 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2586 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2587 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 2588 } else if (dof == 8) { 2589 B->ops->mult = MatMult_SeqMAIJ_8; 2590 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2591 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2592 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2593 } else if (dof == 9) { 2594 B->ops->mult = MatMult_SeqMAIJ_9; 2595 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2596 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2597 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2598 } else if (dof == 10) { 2599 B->ops->mult = MatMult_SeqMAIJ_10; 2600 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2601 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2602 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 2603 } else if (dof == 16) { 2604 B->ops->mult = MatMult_SeqMAIJ_16; 2605 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2606 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2607 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2608 } else { 2609 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 2610 } 2611 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 2612 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2614 } else { 2615 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2616 IS from,to; 2617 Vec gvec; 2618 PetscInt *garray,i; 2619 2620 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2621 B->ops->destroy = MatDestroy_MPIMAIJ; 2622 B->ops->view = MatView_MPIMAIJ; 2623 b = (Mat_MPIMAIJ*)B->data; 2624 b->dof = dof; 2625 b->A = A; 2626 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 2627 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 2628 2629 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2630 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 2631 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2632 2633 /* create two temporary Index sets for build scatter gather */ 2634 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2635 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2636 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2637 ierr = PetscFree(garray);CHKERRQ(ierr); 2638 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2639 2640 /* create temporary global vector to generate scatter context */ 2641 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 2642 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2643 2644 /* generate the scatter context */ 2645 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2646 2647 ierr = ISDestroy(from);CHKERRQ(ierr); 2648 ierr = ISDestroy(to);CHKERRQ(ierr); 2649 ierr = VecDestroy(gvec);CHKERRQ(ierr); 2650 2651 B->ops->mult = MatMult_MPIMAIJ_dof; 2652 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 2653 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 2654 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 2655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2656 } 2657 *maij = B; 2658 ierr = MatView_Private(B);CHKERRQ(ierr); 2659 } 2660 PetscFunctionReturn(0); 2661 } 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674