15e5bbd0aSStefano Zampini #include <petsc/private/pcbddcimpl.h> 25e5bbd0aSStefano Zampini #include <petsc/private/pcbddcprivateimpl.h> 372b8c272SStefano Zampini #include <petscblaslapack.h> 4839e9adbSstefano_zampini #include <../src/mat/impls/dense/seq/dense.h> 5674ae819SStefano Zampini 634a97f8cSStefano Zampini /* prototypes for deluxe functions */ 734a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 834a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 934a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 105a95e1ceSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 1134a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12674ae819SStefano Zampini 13d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X) 14d71ae5a4SJacob Faibussowitsch { 15839e9adbSstefano_zampini Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 161683a169SBarry Smith const PetscScalar *b; 171683a169SBarry Smith PetscScalar *x; 18839e9adbSstefano_zampini PetscInt n; 19839e9adbSstefano_zampini PetscBLASInt nrhs, info, m; 20839e9adbSstefano_zampini PetscBool flg; 21839e9adbSstefano_zampini 22839e9adbSstefano_zampini PetscFunctionBegin; 239566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 249566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 2528b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 269566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 2728b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 28839e9adbSstefano_zampini 299566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, NULL, &n)); 309566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n, &nrhs)); 319566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 329566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 339566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x, b, m * nrhs)); 349566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 35839e9adbSstefano_zampini 360fdf79fbSJacob Faibussowitsch PetscCheck(A->factortype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported"); 37792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 3828b400f6SJacob Faibussowitsch PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve"); 39839e9adbSstefano_zampini 409566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 43839e9adbSstefano_zampini } 44839e9adbSstefano_zampini 45d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 46d71ae5a4SJacob Faibussowitsch { 47674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 48674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 49674ae819SStefano Zampini 50674ae819SStefano Zampini PetscFunctionBegin; 51674ae819SStefano Zampini /* Apply partition of unity */ 529566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector)); 539566063dSJacob Faibussowitsch PetscCall(VecSet(global_vector, 0.0)); 549566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 559566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 57674ae819SStefano Zampini } 58674ae819SStefano Zampini 59d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 60d71ae5a4SJacob Faibussowitsch { 61674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 62674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 63674ae819SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 64674ae819SStefano Zampini 65674ae819SStefano Zampini PetscFunctionBegin; 669566063dSJacob Faibussowitsch PetscCall(VecSet(pcbddc->work_scaling, 0.0)); 679566063dSJacob Faibussowitsch PetscCall(VecSet(y, 0.0)); 685a95e1ceSStefano Zampini if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 695a95e1ceSStefano Zampini PetscInt i; 702b095fd8SStefano Zampini const PetscScalar *array_x, *array_D; 712b095fd8SStefano Zampini PetscScalar *array; 729566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &array_x)); 739566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(pcis->D, &array_D)); 749566063dSJacob Faibussowitsch PetscCall(VecGetArray(pcbddc->work_scaling, &array)); 75ad540459SPierre Jolivet for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]]; 769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(pcbddc->work_scaling, &array)); 779566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &array_x)); 7934a97f8cSStefano Zampini } 80562efe2eSBarry Smith /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */ 815a95e1ceSStefano Zampini if (deluxe_ctx->seq_mat) { 8272b8c272SStefano Zampini PetscInt i; 8372b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 8416e386b8SStefano Zampini if (deluxe_ctx->change) { 859566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 869566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 8772b8c272SStefano Zampini if (deluxe_ctx->change_with_qr) { 8872b8c272SStefano Zampini Mat change; 8972b8c272SStefano Zampini 909566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 919566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 9272b8c272SStefano Zampini } else { 939566063dSJacob Faibussowitsch PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 9416e386b8SStefano Zampini } 9572b8c272SStefano Zampini } else { 969566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 979566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 9872b8c272SStefano Zampini } 999566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 10072b8c272SStefano Zampini if (deluxe_ctx->seq_mat_inv_sum[i]) { 10172b8c272SStefano Zampini PetscScalar *x; 10272b8c272SStefano Zampini 1039566063dSJacob Faibussowitsch PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x)); 1049566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x)); 1059566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x)); 1069566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 1079566063dSJacob Faibussowitsch PetscCall(VecResetArray(deluxe_ctx->seq_work1[i])); 108ac632422SStefano Zampini } 10916e386b8SStefano Zampini if (deluxe_ctx->change) { 11016e386b8SStefano Zampini Mat change; 11116e386b8SStefano Zampini 1129566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 1139566063dSJacob Faibussowitsch PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 1149566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 1159566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 11616e386b8SStefano Zampini } else { 1179566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 1189566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 11972b8c272SStefano Zampini } 120674ae819SStefano Zampini } 12116e386b8SStefano Zampini } 122674ae819SStefano Zampini /* put local boundary part in global vector */ 1239566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 1249566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 1253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 126674ae819SStefano Zampini } 127674ae819SStefano Zampini 128d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 129d71ae5a4SJacob Faibussowitsch { 130674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 131674ae819SStefano Zampini 132674ae819SStefano Zampini PetscFunctionBegin; 133674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 134674ae819SStefano Zampini PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2); 135674ae819SStefano Zampini PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3); 13608401ef6SPierre Jolivet PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 137cac4c232SBarry Smith PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector)); 1383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 139674ae819SStefano Zampini } 140674ae819SStefano Zampini 141d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 142d71ae5a4SJacob Faibussowitsch { 143674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 144674ae819SStefano Zampini 145674ae819SStefano Zampini PetscFunctionBegin; 1469566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 1479566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 148674ae819SStefano Zampini /* Apply partition of unity */ 1499566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector)); 1503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 151674ae819SStefano Zampini } 152674ae819SStefano Zampini 153d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 154d71ae5a4SJacob Faibussowitsch { 155674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 156674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 157674ae819SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 158674ae819SStefano Zampini 159674ae819SStefano Zampini PetscFunctionBegin; 160674ae819SStefano Zampini /* get local boundary part of global vector */ 1619566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1629566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1635a95e1ceSStefano Zampini if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 1645a95e1ceSStefano Zampini PetscInt i; 1652b095fd8SStefano Zampini PetscScalar *array_y; 1662b095fd8SStefano Zampini const PetscScalar *array_D; 1679566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &array_y)); 1689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(pcis->D, &array_D)); 169ad540459SPierre Jolivet for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 1709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 1719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &array_y)); 17234a97f8cSStefano Zampini } 173562efe2eSBarry Smith /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */ 1745a95e1ceSStefano Zampini if (deluxe_ctx->seq_mat) { 17572b8c272SStefano Zampini PetscInt i; 17672b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 17716e386b8SStefano Zampini if (deluxe_ctx->change) { 17816e386b8SStefano Zampini Mat change; 17916e386b8SStefano Zampini 1809566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 1819566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 1829566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 1839566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 18416e386b8SStefano Zampini } else { 1859566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 1869566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 18716e386b8SStefano Zampini } 18872b8c272SStefano Zampini if (deluxe_ctx->seq_mat_inv_sum[i]) { 18972b8c272SStefano Zampini PetscScalar *x; 19072b8c272SStefano Zampini 1919566063dSJacob Faibussowitsch PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x)); 1929566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x)); 1939566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x)); 1949566063dSJacob Faibussowitsch PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 1959566063dSJacob Faibussowitsch PetscCall(VecResetArray(deluxe_ctx->seq_work2[i])); 196ac632422SStefano Zampini } 1979566063dSJacob Faibussowitsch PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 19816e386b8SStefano Zampini if (deluxe_ctx->change) { 19972b8c272SStefano Zampini if (deluxe_ctx->change_with_qr) { 20072b8c272SStefano Zampini Mat change; 20172b8c272SStefano Zampini 2029566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 2039566063dSJacob Faibussowitsch PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 20472b8c272SStefano Zampini } else { 2059566063dSJacob Faibussowitsch PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 20616e386b8SStefano Zampini } 2079566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 2089566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 20972b8c272SStefano Zampini } else { 2109566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 2119566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 21272b8c272SStefano Zampini } 21372b8c272SStefano Zampini } 214674ae819SStefano Zampini } 2153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 216674ae819SStefano Zampini } 217674ae819SStefano Zampini 218d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 219d71ae5a4SJacob Faibussowitsch { 220674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 221674ae819SStefano Zampini 222674ae819SStefano Zampini PetscFunctionBegin; 223674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 224674ae819SStefano Zampini PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2); 225674ae819SStefano Zampini PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3); 22608401ef6SPierre Jolivet PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 227cac4c232SBarry Smith PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector)); 2283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 229674ae819SStefano Zampini } 230674ae819SStefano Zampini 231d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCScalingSetUp(PC pc) 232d71ae5a4SJacob Faibussowitsch { 233674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 234674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 235674ae819SStefano Zampini 236674ae819SStefano Zampini PetscFunctionBegin; 237674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 2389566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 239674ae819SStefano Zampini /* create work vector for the operator */ 2409566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pcbddc->work_scaling)); 2419566063dSJacob Faibussowitsch PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling)); 24234a97f8cSStefano Zampini /* always rebuild pcis->D */ 24328d874f6SStefano Zampini if (pcis->use_stiffness_scaling) { 24415f235b8SStefano Zampini PetscScalar *a; 24515f235b8SStefano Zampini PetscInt i, n; 24615f235b8SStefano Zampini 2479566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N)); 2489566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 2499566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 2509566063dSJacob Faibussowitsch PetscCall(VecAbs(pcis->D)); 2519566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(pcis->D, &n)); 2529566063dSJacob Faibussowitsch PetscCall(VecGetArray(pcis->D, &a)); 2539371c9d4SSatish Balay for (i = 0; i < n; i++) 2549371c9d4SSatish Balay if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0; 2559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(pcis->D, &a)); 256674ae819SStefano Zampini } 2579566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 2589566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2599566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2609566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2619566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2629566063dSJacob Faibussowitsch PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B)); 263674ae819SStefano Zampini /* now setup */ 264681e7c04SStefano Zampini if (pcbddc->use_deluxe_scaling) { 26548a46eb9SPierre Jolivet if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc)); 2669566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingSetUp_Deluxe(pc)); 2679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe)); 2689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe)); 269674ae819SStefano Zampini } else { 2709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic)); 2719566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic)); 272674ae819SStefano Zampini } 2739566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 27434a97f8cSStefano Zampini 275674ae819SStefano Zampini /* test */ 276674ae819SStefano Zampini if (pcbddc->dbg_flag) { 27772b8c272SStefano Zampini Mat B0_B = NULL; 27872b8c272SStefano Zampini Vec B0_Bv = NULL, B0_Bv2 = NULL; 27934a97f8cSStefano Zampini Vec vec2_global; 280674ae819SStefano Zampini PetscViewer viewer = pcbddc->dbg_viewer; 28134a97f8cSStefano Zampini PetscReal error; 282674ae819SStefano Zampini 283674ae819SStefano Zampini /* extension -> from local to parallel */ 2849566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 2859566063dSJacob Faibussowitsch PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 2869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2889566063dSJacob Faibussowitsch PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global)); 2899566063dSJacob Faibussowitsch PetscCall(VecCopy(pcis->vec1_global, vec2_global)); 2909566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2919566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 29272b8c272SStefano Zampini if (pcbddc->benign_n) { 29372b8c272SStefano Zampini IS is_dummy; 29472b8c272SStefano Zampini 2959566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 2969566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 2979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_dummy)); 2989566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv)); 2999566063dSJacob Faibussowitsch PetscCall(VecDuplicate(B0_Bv, &B0_Bv2)); 3009566063dSJacob Faibussowitsch PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv)); 30172b8c272SStefano Zampini } 3029566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global)); 30372b8c272SStefano Zampini if (pcbddc->benign_saddle_point) { 30472b8c272SStefano Zampini PetscReal errorl = 0.; 3059566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 3069566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 30772b8c272SStefano Zampini if (pcbddc->benign_n) { 3089566063dSJacob Faibussowitsch PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2)); 3099566063dSJacob Faibussowitsch PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2)); 3109566063dSJacob Faibussowitsch PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl)); 31172b8c272SStefano Zampini } 312462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc))); 31363a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error)); 31472b8c272SStefano Zampini } 3159566063dSJacob Faibussowitsch PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global)); 3169566063dSJacob Faibussowitsch PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 31763a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error)); 3189566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vec2_global)); 31934a97f8cSStefano Zampini 320674ae819SStefano Zampini /* restriction -> from parallel to local */ 3219566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 3229566063dSJacob Faibussowitsch PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 3239566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3249566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3259566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B)); 3269566063dSJacob Faibussowitsch PetscCall(VecScale(pcis->vec1_B, -1.0)); 3279566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3289566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3299566063dSJacob Faibussowitsch PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 33063a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error)); 3319566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B0_B)); 3329566063dSJacob Faibussowitsch PetscCall(VecDestroy(&B0_Bv)); 3339566063dSJacob Faibussowitsch PetscCall(VecDestroy(&B0_Bv2)); 334674ae819SStefano Zampini } 3353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 336674ae819SStefano Zampini } 337674ae819SStefano Zampini 338d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCScalingDestroy(PC pc) 339d71ae5a4SJacob Faibussowitsch { 340674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 341674ae819SStefano Zampini 342674ae819SStefano Zampini PetscFunctionBegin; 3431baa6e33SBarry Smith if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc)); 3449566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pcbddc->work_scaling)); 3459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL)); 3469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL)); 3473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 348674ae819SStefano Zampini } 349674ae819SStefano Zampini 350d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 351d71ae5a4SJacob Faibussowitsch { 35234a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 35334a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx; 35434a97f8cSStefano Zampini 35534a97f8cSStefano Zampini PetscFunctionBegin; 3569566063dSJacob Faibussowitsch PetscCall(PetscNew(&deluxe_ctx)); 35734a97f8cSStefano Zampini pcbddc->deluxe_ctx = deluxe_ctx; 3583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35934a97f8cSStefano Zampini } 36034a97f8cSStefano Zampini 361d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 362d71ae5a4SJacob Faibussowitsch { 36334a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 36434a97f8cSStefano Zampini 36534a97f8cSStefano Zampini PetscFunctionBegin; 3669566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx)); 3679566063dSJacob Faibussowitsch PetscCall(PetscFree(pcbddc->deluxe_ctx)); 3683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36934a97f8cSStefano Zampini } 37034a97f8cSStefano Zampini 371d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 372d71ae5a4SJacob Faibussowitsch { 37372b8c272SStefano Zampini PetscInt i; 37434a97f8cSStefano Zampini 37534a97f8cSStefano Zampini PetscFunctionBegin; 3769566063dSJacob Faibussowitsch PetscCall(PetscFree(deluxe_ctx->idx_simple_B)); 37734a97f8cSStefano Zampini deluxe_ctx->n_simple = 0; 37872b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 3799566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i])); 3809566063dSJacob Faibussowitsch PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i])); 3819566063dSJacob Faibussowitsch PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i])); 3829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 3839566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 38472b8c272SStefano Zampini } 3859566063dSJacob Faibussowitsch PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum)); 3869566063dSJacob Faibussowitsch PetscCall(PetscFree(deluxe_ctx->workspace)); 38772b8c272SStefano Zampini deluxe_ctx->seq_n = 0; 3883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 38934a97f8cSStefano Zampini } 39034a97f8cSStefano Zampini 391d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 392d71ae5a4SJacob Faibussowitsch { 39334a97f8cSStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 39434a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 39534a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 396b96c3477SStefano Zampini PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 39734a97f8cSStefano Zampini 39834a97f8cSStefano Zampini PetscFunctionBegin; 39972b8c272SStefano Zampini /* reset data structures if the topology has changed */ 4001baa6e33SBarry Smith if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx)); 40134a97f8cSStefano Zampini 402b1b3d7a2SStefano Zampini /* Compute data structures to solve sequential problems */ 4039566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc)); 4045db18549SStefano Zampini 405b1b3d7a2SStefano Zampini /* diagonal scaling on interface dofs not contained in cc */ 406d62866d3SStefano Zampini if (sub_schurs->is_vertices || sub_schurs->is_dir) { 4071b968477SStefano Zampini PetscInt n_com, n_dir; 4081b968477SStefano Zampini n_com = 0; 40948a46eb9SPierre Jolivet if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com)); 4101b968477SStefano Zampini n_dir = 0; 41148a46eb9SPierre Jolivet if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 41272b8c272SStefano Zampini if (!deluxe_ctx->n_simple) { 4131b968477SStefano Zampini deluxe_ctx->n_simple = n_dir + n_com; 4149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B)); 415d62866d3SStefano Zampini if (sub_schurs->is_vertices) { 4169bb4a8caSStefano Zampini PetscInt nmap; 4179bb4a8caSStefano Zampini const PetscInt *idxs; 4181b968477SStefano Zampini 4199566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 4209566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B)); 42163a3b9bcSJacob Faibussowitsch PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com); 4229566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 4231b968477SStefano Zampini } 424d62866d3SStefano Zampini if (sub_schurs->is_dir) { 4251b968477SStefano Zampini PetscInt nmap; 4261b968477SStefano Zampini const PetscInt *idxs; 4271b968477SStefano Zampini 4289566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 4299566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com)); 43063a3b9bcSJacob Faibussowitsch PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir); 4319566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 4321b968477SStefano Zampini } 4339566063dSJacob Faibussowitsch PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B)); 4349bb4a8caSStefano Zampini } else { 4352472a847SBarry Smith PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple); 43672b8c272SStefano Zampini } 43772b8c272SStefano Zampini } else { 438b1b3d7a2SStefano Zampini deluxe_ctx->n_simple = 0; 4390a545947SLisandro Dalcin deluxe_ctx->idx_simple_B = NULL; 440b1b3d7a2SStefano Zampini } 4413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 44234a97f8cSStefano Zampini } 44334a97f8cSStefano Zampini 444d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 445d71ae5a4SJacob Faibussowitsch { 4465db18549SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4475db18549SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 448b96c3477SStefano Zampini PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 44972b8c272SStefano Zampini PetscScalar *matdata, *matdata2; 45072b8c272SStefano Zampini PetscInt i, max_subset_size, cum, cum2; 45172b8c272SStefano Zampini const PetscInt *idxs; 45272b8c272SStefano Zampini PetscBool newsetup = PETSC_FALSE; 4535db18549SStefano Zampini 4545db18549SStefano Zampini PetscFunctionBegin; 45528b400f6SJacob Faibussowitsch PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs"); 4563ba16761SJacob Faibussowitsch if (!sub_schurs->n_subs) PetscFunctionReturn(PETSC_SUCCESS); 4579221af80SStefano Zampini 45872b8c272SStefano Zampini /* Allocate arrays for subproblems */ 45972b8c272SStefano Zampini if (!deluxe_ctx->seq_n) { 46072b8c272SStefano Zampini deluxe_ctx->seq_n = sub_schurs->n_subs; 4619566063dSJacob Faibussowitsch PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum)); 46272b8c272SStefano Zampini newsetup = PETSC_TRUE; 46363a3b9bcSJacob Faibussowitsch } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs); 4646080607fSStefano Zampini 4652f41f7d1SStefano Zampini /* the change of basis is just a reference to sub_schurs->change (if any) */ 4662f41f7d1SStefano Zampini deluxe_ctx->change = sub_schurs->change; 46772b8c272SStefano Zampini deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 4685db18549SStefano Zampini 46972b8c272SStefano Zampini /* Create objects for deluxe */ 47072b8c272SStefano Zampini max_subset_size = 0; 47172b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 47272b8c272SStefano Zampini PetscInt subset_size; 4739566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 47472b8c272SStefano Zampini max_subset_size = PetscMax(subset_size, max_subset_size); 47572b8c272SStefano Zampini } 47648a46eb9SPierre Jolivet if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace)); 47772b8c272SStefano Zampini cum = cum2 = 0; 4789566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 4799566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata)); 4809566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2)); 48172b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 48272b8c272SStefano Zampini PetscInt subset_size; 483925dfd53SStefano Zampini 4849566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 48572b8c272SStefano Zampini if (newsetup) { 48672b8c272SStefano Zampini IS sub; 48772b8c272SStefano Zampini /* work vectors */ 4889566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i])); 4899566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i])); 49072b8c272SStefano Zampini 49172b8c272SStefano Zampini /* scatters */ 4929566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub)); 4939566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i])); 4949566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub)); 495839e9adbSstefano_zampini } 49672b8c272SStefano Zampini 49772b8c272SStefano Zampini /* S_E_j */ 4989566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 4999566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i])); 500839e9adbSstefano_zampini 50172b8c272SStefano Zampini /* \sum_k S^k_E_j */ 5029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 5039566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i])); 5049566063dSJacob Faibussowitsch PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef)); 5059566063dSJacob Faibussowitsch PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian)); 506*79329b78SStefano Zampini switch (sub_schurs->mat_factor_type) { 507*79329b78SStefano Zampini case MAT_FACTOR_CHOLESKY: 5089566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL)); 509*79329b78SStefano Zampini break; 510*79329b78SStefano Zampini case MAT_FACTOR_LU: 5119566063dSJacob Faibussowitsch PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL)); 512*79329b78SStefano Zampini break; 513*79329b78SStefano Zampini default: 514*79329b78SStefano Zampini SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 515d62866d3SStefano Zampini } 516839e9adbSstefano_zampini if (pcbddc->deluxe_singlemat) { 517839e9adbSstefano_zampini Mat X, Y; 5183829e7e6SStefano Zampini if (!sub_schurs->is_hermitian) { 5199566063dSJacob Faibussowitsch PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X)); 520839e9adbSstefano_zampini } else { 5219566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i])); 522839e9adbSstefano_zampini X = deluxe_ctx->seq_mat[i]; 523839e9adbSstefano_zampini } 5249566063dSJacob Faibussowitsch PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y)); 5253829e7e6SStefano Zampini if (!sub_schurs->is_hermitian) { 5269566063dSJacob Faibussowitsch PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 527839e9adbSstefano_zampini } else { 5289566063dSJacob Faibussowitsch PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 529839e9adbSstefano_zampini } 5309566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 5319566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 5329566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 5332f41f7d1SStefano Zampini if (deluxe_ctx->change) { 5342f41f7d1SStefano Zampini Mat C, CY; 53528b400f6SJacob Faibussowitsch PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis"); 5369566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL)); 537fb842aefSJose E. Roman PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_CURRENT, &CY)); 538fb842aefSJose E. Roman PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y)); 5399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&CY)); 5409566063dSJacob Faibussowitsch PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */ 5412f41f7d1SStefano Zampini } 5429566063dSJacob Faibussowitsch PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y)); 543839e9adbSstefano_zampini deluxe_ctx->seq_mat[i] = Y; 54472b8c272SStefano Zampini } 54572b8c272SStefano Zampini cum += subset_size; 54672b8c272SStefano Zampini cum2 += subset_size * subset_size; 54772b8c272SStefano Zampini } 5489566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 5499566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata)); 5509566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2)); 5512f41f7d1SStefano Zampini if (pcbddc->deluxe_singlemat) { 5522f41f7d1SStefano Zampini deluxe_ctx->change = NULL; 5532f41f7d1SStefano Zampini deluxe_ctx->change_with_qr = PETSC_FALSE; 5542f41f7d1SStefano Zampini } 5555db18549SStefano Zampini 55672b8c272SStefano Zampini if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 55772b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 55872b8c272SStefano Zampini if (newsetup) { 55972b8c272SStefano Zampini PC pc; 56072b8c272SStefano Zampini 5619566063dSJacob Faibussowitsch PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc)); 5629566063dSJacob Faibussowitsch PetscCall(PCSetType(pc, PCLU)); 5639566063dSJacob Faibussowitsch PetscCall(KSPSetFromOptions(deluxe_ctx->change[i])); 56472b8c272SStefano Zampini } 5659566063dSJacob Faibussowitsch PetscCall(KSPSetUp(deluxe_ctx->change[i])); 56672b8c272SStefano Zampini } 56716e386b8SStefano Zampini } 5683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5695db18549SStefano Zampini } 570