1 static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\ 2 Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n"; 3 4 /* 5 Example: 6 mpiexec -n <np> ./ex33 -mem_view -matproduct_batch_size <Bbn> 7 */ 8 9 #include <petsc.h> 10 11 PetscErrorCode Print_memory(PetscLogDouble mem) 12 { 13 double max_mem, min_mem; 14 15 PetscFunctionBeginUser; 16 PetscCallMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD)); 17 PetscCallMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD)); 18 max_mem = max_mem / 1024.0 / 1024.0; 19 min_mem = min_mem / 1024.0 / 1024.0; 20 PetscCall(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem, (double)min_mem)); 21 PetscFunctionReturn(PETSC_SUCCESS); 22 } 23 24 /* 25 Illustrate how to use MPI derived data types. 26 It would save memory significantly. See MatMPIDenseScatter() 27 */ 28 PetscErrorCode TestMPIDerivedDataType() 29 { 30 MPI_Datatype type1, type2, rtype1, rtype2; 31 PetscInt i, j; 32 PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */ 33 MPI_Status status; 34 PetscMPIInt rank, size, disp[2]; 35 36 PetscFunctionBeginUser; 37 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 38 PetscCheck(size >= 2, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Must use at least 2 processors"); 39 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 40 41 if (rank == 0) { 42 /* proc[0] sends 2 rows to proc[1] */ 43 for (i = 0; i < 24; i++) buffer[i] = (PetscScalar)i; 44 45 disp[0] = 0; 46 disp[1] = 2; 47 PetscCallMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1)); 48 /* one column has 4 entries */ 49 PetscCallMPI(MPI_Type_create_resized(type1, 0, 4 * sizeof(PetscScalar), &type2)); 50 PetscCallMPI(MPI_Type_commit(&type2)); 51 PetscCallMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD)); 52 53 } else if (rank == 1) { 54 /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */ 55 for (i = 0; i < 24; i++) buffer[i] = 0.0; 56 57 disp[0] = 1; 58 PetscCallMPI(MPI_Type_create_indexed_block(1, 2, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1)); 59 PetscCallMPI(MPI_Type_create_resized(rtype1, 0, 4 * sizeof(PetscScalar), &rtype2)); 60 61 PetscCallMPI(MPI_Type_commit(&rtype2)); 62 PetscCallMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status)); 63 for (i = 0; i < 4; i++) { 64 for (j = 0; j < 6; j++) PetscCall(PetscPrintf(MPI_COMM_SELF, " %g", (double)PetscRealPart(buffer[i + j * 4]))); 65 PetscCall(PetscPrintf(MPI_COMM_SELF, "\n")); 66 } 67 } 68 69 if (rank == 0) { 70 PetscCallMPI(MPI_Type_free(&type1)); 71 PetscCallMPI(MPI_Type_free(&type2)); 72 } else if (rank == 1) { 73 PetscCallMPI(MPI_Type_free(&rtype1)); 74 PetscCallMPI(MPI_Type_free(&rtype2)); 75 } 76 PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 int main(int argc, char **args) 81 { 82 PetscInt mA = 2700, nX = 80, nz = 40; 83 /* PetscInt mA=6,nX=5,nz=2; //small test */ 84 PetscLogDouble mem; 85 Mat A, X, Y; 86 PetscBool flg = PETSC_FALSE; 87 88 PetscFunctionBeginUser; 89 PetscCall(PetscInitialize(&argc, &args, NULL, help)); 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_mpiderivedtype", &flg, NULL)); 91 if (flg) { 92 PetscCall(TestMPIDerivedDataType()); 93 PetscCall(PetscFinalize()); 94 return 0; 95 } 96 97 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mem_view", &flg, NULL)); 98 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 99 if (flg) { 100 PetscCall(PetscPrintf(MPI_COMM_WORLD, "Before start,")); 101 PetscCall(Print_memory(mem)); 102 } 103 104 PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, mA, nz, NULL, nz, NULL, &A)); 105 PetscCall(MatSetRandom(A, NULL)); 106 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 107 if (flg) { 108 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating A,")); 109 PetscCall(Print_memory(mem)); 110 } 111 112 PetscCall(MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, nX, NULL, &X)); 113 PetscCall(MatSetRandom(X, NULL)); 114 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 115 if (flg) { 116 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating X,")); 117 PetscCall(Print_memory(mem)); 118 } 119 120 PetscCall(MatMatMult(A, X, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &Y)); 121 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 122 if (flg) { 123 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,")); 124 PetscCall(Print_memory(mem)); 125 } 126 127 /* Test reuse */ 128 PetscCall(MatMatMult(A, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y)); 129 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 130 if (flg) { 131 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,")); 132 PetscCall(Print_memory(mem)); 133 } 134 135 /* Check accuracy */ 136 PetscCall(MatMatMultEqual(A, X, Y, 10, &flg)); 137 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "Error in MatMatMult()"); 138 139 PetscCall(MatDestroy(&A)); 140 PetscCall(MatDestroy(&X)); 141 PetscCall(MatDestroy(&Y)); 142 143 PetscCall(PetscFinalize()); 144 return 0; 145 } 146 147 /*TEST 148 149 test: 150 suffix: 1 151 nsize: 4 152 output_file: output/empty.out 153 154 test: 155 suffix: 2 156 nsize: 8 157 output_file: output/empty.out 158 159 test: 160 suffix: 3 161 nsize: 2 162 args: -test_mpiderivedtype 163 output_file: output/ex33_3.out 164 165 TEST*/ 166