1 static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2 "This test can only be run in parallel.\n" 3 "\n"; 4 5 #include <petscmat.h> 6 7 PetscErrorCode MyISView(IS *rowis, IS *colis, PetscInt gs, PetscInt ss, PetscViewer viewer) 8 { 9 PetscViewer subviewer = NULL; 10 11 PetscFunctionBeginUser; 12 PetscCall(PetscViewerASCIIPrintf(viewer, "Row IS %" PetscInt_FMT "\n", gs)); 13 if (ss > -1) { 14 PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 15 PetscCall(ISView(rowis[ss], subviewer)); 16 PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 17 } 18 PetscCall(PetscViewerFlush(viewer)); 19 PetscCall(PetscViewerASCIIPrintf(viewer, "Col IS %" PetscInt_FMT "\n", gs)); 20 if (ss > -1) { 21 PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 22 PetscCall(ISView(colis[ss], subviewer)); 23 PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 24 } 25 PetscCall(PetscViewerFlush(viewer)); 26 PetscFunctionReturn(PETSC_SUCCESS); 27 } 28 29 int main(int argc, char **args) 30 { 31 Mat A, *submats; 32 MPI_Comm subcomm; 33 PetscMPIInt rank, size, subrank, subsize, color; 34 PetscInt m, n, N, bs, rstart, rend, i, j, k, total_subdomains, hash, nsubdomains = 1; 35 PetscInt nis, *cols, gnsubdomains, gsubdomainnums[1], gsubdomainperm[1], s, gs; 36 PetscInt *rowindices, *colindices, idx, rep; 37 PetscScalar *vals; 38 IS rowis[1], colis[1]; 39 PetscViewer viewer; 40 PetscBool permute_indices, flg; 41 42 PetscFunctionBeginUser; 43 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 44 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 45 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 46 47 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "ex183", "Mat"); 48 m = 5; 49 PetscCall(PetscOptionsInt("-m", "Local matrix size", "MatSetSizes", m, &m, &flg)); 50 total_subdomains = size - 1; 51 PetscCall(PetscOptionsInt("-total_subdomains", "Number of submatrices where 0 < n < comm size", "MatCreateSubMatricesMPI", total_subdomains, &total_subdomains, &flg)); 52 permute_indices = PETSC_FALSE; 53 PetscCall(PetscOptionsBool("-permute_indices", "Whether to permute indices before breaking them into subdomains", "ISCreateGeneral", permute_indices, &permute_indices, &flg)); 54 hash = 7; 55 PetscCall(PetscOptionsInt("-hash", "Permutation factor, which has to be relatively prime to M = size*m (total matrix size)", "ISCreateGeneral", hash, &hash, &flg)); 56 rep = 2; 57 PetscCall(PetscOptionsInt("-rep", "Number of times to carry out submatrix extractions; currently only 1 & 2 are supported", NULL, rep, &rep, &flg)); 58 PetscOptionsEnd(); 59 60 PetscCheck(total_subdomains <= size, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Number of subdomains %" PetscInt_FMT " must not exceed comm size %d", total_subdomains, size); 61 PetscCheck(total_subdomains >= 1 && total_subdomains <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT, size, total_subdomains); 62 PetscCheck(rep == 1 || rep == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2", rep); 63 64 viewer = PETSC_VIEWER_STDOUT_WORLD; 65 /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 66 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 67 PetscCall(MatSetSizes(A, m, m, PETSC_DECIDE, PETSC_DECIDE)); 68 PetscCall(MatSetFromOptions(A)); 69 PetscCall(MatSetUp(A)); 70 PetscCall(MatGetSize(A, NULL, &N)); 71 PetscCall(MatGetLocalSize(A, NULL, &n)); 72 PetscCall(MatGetBlockSize(A, &bs)); 73 PetscCall(MatSeqAIJSetPreallocation(A, n, NULL)); 74 PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, N - n, NULL)); 75 PetscCall(MatSeqBAIJSetPreallocation(A, bs, n / bs, NULL)); 76 PetscCall(MatMPIBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL)); 77 PetscCall(MatSeqSBAIJSetPreallocation(A, bs, n / bs, NULL)); 78 PetscCall(MatMPISBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL)); 79 80 PetscCall(PetscMalloc2(N, &cols, N, &vals)); 81 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 82 for (j = 0; j < N; ++j) cols[j] = j; 83 for (i = rstart; i < rend; i++) { 84 for (j = 0; j < N; ++j) vals[j] = i * 10000 + j; 85 PetscCall(MatSetValues(A, 1, &i, N, cols, vals, INSERT_VALUES)); 86 } 87 PetscCall(PetscFree2(cols, vals)); 88 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 89 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 90 91 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial matrix:\n")); 92 PetscCall(MatView(A, viewer)); 93 94 /* 95 Create subcomms and ISs so that each rank participates in one IS. 96 The IS either coalesces adjacent rank indices (contiguous), 97 or selects indices by scrambling them using a hash. 98 */ 99 k = size / total_subdomains + (size % total_subdomains > 0); /* There are up to k ranks to a color */ 100 color = rank / k; 101 PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD, color, rank, &subcomm)); 102 PetscCallMPI(MPI_Comm_size(subcomm, &subsize)); 103 PetscCallMPI(MPI_Comm_rank(subcomm, &subrank)); 104 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 105 nis = 1; 106 PetscCall(PetscMalloc2(rend - rstart, &rowindices, rend - rstart, &colindices)); 107 108 for (j = rstart; j < rend; ++j) { 109 if (permute_indices) { 110 idx = (j * hash); 111 } else { 112 idx = j; 113 } 114 rowindices[j - rstart] = idx % N; 115 colindices[j - rstart] = (idx + m) % N; 116 } 117 PetscCall(ISCreateGeneral(subcomm, rend - rstart, rowindices, PETSC_COPY_VALUES, &rowis[0])); 118 PetscCall(ISCreateGeneral(subcomm, rend - rstart, colindices, PETSC_COPY_VALUES, &colis[0])); 119 PetscCall(ISSort(rowis[0])); 120 PetscCall(ISSort(colis[0])); 121 PetscCall(PetscFree2(rowindices, colindices)); 122 /* 123 Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 124 we need to obtain the global numbers of our local objects and wait for the corresponding global 125 number to be viewed. 126 */ 127 PetscCall(PetscViewerASCIIPrintf(viewer, "Subdomains")); 128 if (permute_indices) PetscCall(PetscViewerASCIIPrintf(viewer, " (hash=%" PetscInt_FMT ")", hash)); 129 PetscCall(PetscViewerASCIIPrintf(viewer, ":\n")); 130 PetscCall(PetscViewerFlush(viewer)); 131 132 nsubdomains = 1; 133 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 134 PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)rowis, &gnsubdomains, gsubdomainnums)); 135 PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 136 for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 137 PetscInt ss; 138 if (s < nsubdomains) { 139 ss = gsubdomainperm[s]; 140 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 141 ++s; 142 } else ss = -1; 143 } else ss = -1; 144 PetscCall(MyISView(rowis, colis, gs, ss, viewer)); 145 } 146 PetscCall(PetscViewerFlush(viewer)); 147 PetscCall(ISSort(rowis[0])); 148 PetscCall(ISSort(colis[0])); 149 nsubdomains = 1; 150 PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_INITIAL_MATRIX, &submats)); 151 /* 152 Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 153 we need to obtain the global numbers of our local objects and wait for the corresponding global 154 number to be viewed. Also all MPI processes need to call PetscViewerGetSubViewer() the same number of times 155 */ 156 PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 1):\n")); 157 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 158 PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums)); 159 PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 160 for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 161 PetscViewer subviewer = NULL; 162 if (s < nsubdomains) { 163 PetscInt ss; 164 ss = gsubdomainperm[s]; 165 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 166 PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 167 PetscCall(MatView(submats[ss], subviewer)); 168 PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 169 ++s; 170 } else { 171 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &subviewer)); 172 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &subviewer)); 173 } 174 } else { 175 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &subviewer)); 176 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &subviewer)); 177 } 178 } 179 if (rep == 1) goto cleanup; 180 nsubdomains = 1; 181 PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_REUSE_MATRIX, &submats)); 182 /* 183 Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 184 we need to obtain the global numbers of our local objects and wait for the corresponding global 185 number to be viewed. 186 */ 187 PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 2):\n")); 188 for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 189 PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums)); 190 PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 191 for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 192 PetscViewer subviewer = NULL; 193 if (s < nsubdomains) { 194 PetscInt ss; 195 ss = gsubdomainperm[s]; 196 if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 197 PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 198 PetscCall(MatView(submats[ss], subviewer)); 199 PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 200 ++s; 201 } else { 202 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &subviewer)); 203 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &subviewer)); 204 } 205 } else { 206 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &subviewer)); 207 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &subviewer)); 208 } 209 } 210 cleanup: 211 for (k = 0; k < nsubdomains; ++k) PetscCall(MatDestroy(submats + k)); 212 PetscCall(PetscFree(submats)); 213 for (k = 0; k < nis; ++k) { 214 PetscCall(ISDestroy(rowis + k)); 215 PetscCall(ISDestroy(colis + k)); 216 } 217 PetscCall(MatDestroy(&A)); 218 PetscCallMPI(MPI_Comm_free(&subcomm)); 219 PetscCall(PetscFinalize()); 220 return 0; 221 } 222 223 /*TEST 224 225 test: 226 nsize: 2 227 args: -total_subdomains 1 228 output_file: output/ex183_2_1.out 229 230 test: 231 suffix: 2 232 nsize: 3 233 args: -total_subdomains 2 234 output_file: output/ex183_3_2.out 235 236 test: 237 suffix: 3 238 nsize: 4 239 args: -total_subdomains 2 240 output_file: output/ex183_4_2.out 241 242 test: 243 suffix: 4 244 nsize: 6 245 args: -total_subdomains 2 246 output_file: output/ex183_6_2.out 247 248 TEST*/ 249