1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 Input parameter: 30 A - the matrix 31 x - the vector on which FDFT will be performed 32 33 Output parameter: 34 y - vector that stores result of FDFT 35 */ 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMult_SeqFFTW" 38 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39 { 40 PetscErrorCode ierr; 41 Mat_FFT *fft = (Mat_FFT*)A->data; 42 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43 PetscScalar *x_array,*y_array; 44 PetscInt ndim=fft->ndim,*dim=fft->dim; 45 46 PetscFunctionBegin; 47 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49 if (!fftw->p_forward) { /* create a plan, then excute it */ 50 switch (ndim) { 51 case 1: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 2: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 case 3: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 default: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 77 #endif 78 break; 79 } 80 fftw->finarray = x_array; 81 fftw->foutarray = y_array; 82 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 83 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 84 fftw_execute(fftw->p_forward); 85 } else { /* use existing plan */ 86 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 87 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 88 } else { 89 fftw_execute(fftw->p_forward); 90 } 91 } 92 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 93 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 /* MatMultTranspose_SeqFFTW performs serial backward DFT 98 Input parameter: 99 A - the matrix 100 x - the vector on which BDFT will be performed 101 102 Output parameter: 103 y - vector that stores result of BDFT 104 */ 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 108 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 109 { 110 PetscErrorCode ierr; 111 Mat_FFT *fft = (Mat_FFT*)A->data; 112 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 113 PetscScalar *x_array,*y_array; 114 PetscInt ndim=fft->ndim,*dim=fft->dim; 115 116 PetscFunctionBegin; 117 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 118 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 119 if (!fftw->p_backward) { /* create a plan, then excute it */ 120 switch (ndim) { 121 case 1: 122 #if defined(PETSC_USE_COMPLEX) 123 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 124 #else 125 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 126 #endif 127 break; 128 case 2: 129 #if defined(PETSC_USE_COMPLEX) 130 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 131 #else 132 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 133 #endif 134 break; 135 case 3: 136 #if defined(PETSC_USE_COMPLEX) 137 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 138 #else 139 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 140 #endif 141 break; 142 default: 143 #if defined(PETSC_USE_COMPLEX) 144 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 145 #else 146 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 147 #endif 148 break; 149 } 150 fftw->binarray = x_array; 151 fftw->boutarray = y_array; 152 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 153 } else { /* use existing plan */ 154 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 155 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 156 } else { 157 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 158 } 159 } 160 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 161 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /* MatMult_MPIFFTW performs forward DFT in parallel 166 Input parameter: 167 A - the matrix 168 x - the vector on which FDFT will be performed 169 170 Output parameter: 171 y - vector that stores result of FDFT 172 */ 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatMult_MPIFFTW" 175 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 176 { 177 PetscErrorCode ierr; 178 Mat_FFT *fft = (Mat_FFT*)A->data; 179 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180 PetscScalar *x_array,*y_array; 181 PetscInt ndim=fft->ndim,*dim=fft->dim; 182 MPI_Comm comm; 183 184 PetscFunctionBegin; 185 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 186 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 187 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 188 if (!fftw->p_forward) { /* create a plan, then excute it */ 189 switch (ndim) { 190 case 1: 191 #if defined(PETSC_USE_COMPLEX) 192 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 193 #else 194 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 195 #endif 196 break; 197 case 2: 198 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 199 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 200 #else 201 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 202 #endif 203 break; 204 case 3: 205 #if defined(PETSC_USE_COMPLEX) 206 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 207 #else 208 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 209 #endif 210 break; 211 default: 212 #if defined(PETSC_USE_COMPLEX) 213 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 214 #else 215 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 216 #endif 217 break; 218 } 219 fftw->finarray = x_array; 220 fftw->foutarray = y_array; 221 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 222 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 223 fftw_execute(fftw->p_forward); 224 } else { /* use existing plan */ 225 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 226 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 227 } else { 228 fftw_execute(fftw->p_forward); 229 } 230 } 231 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 232 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 237 Input parameter: 238 A - the matrix 239 x - the vector on which BDFT will be performed 240 241 Output parameter: 242 y - vector that stores result of BDFT 243 */ 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 246 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 247 { 248 PetscErrorCode ierr; 249 Mat_FFT *fft = (Mat_FFT*)A->data; 250 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 251 PetscScalar *x_array,*y_array; 252 PetscInt ndim=fft->ndim,*dim=fft->dim; 253 MPI_Comm comm; 254 255 PetscFunctionBegin; 256 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 257 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 258 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 259 if (!fftw->p_backward) { /* create a plan, then excute it */ 260 switch (ndim) { 261 case 1: 262 #if defined(PETSC_USE_COMPLEX) 263 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 264 #else 265 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 266 #endif 267 break; 268 case 2: 269 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 270 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271 #else 272 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 273 #endif 274 break; 275 case 3: 276 #if defined(PETSC_USE_COMPLEX) 277 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 278 #else 279 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 280 #endif 281 break; 282 default: 283 #if defined(PETSC_USE_COMPLEX) 284 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 285 #else 286 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 287 #endif 288 break; 289 } 290 fftw->binarray = x_array; 291 fftw->boutarray = y_array; 292 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 293 } else { /* use existing plan */ 294 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 295 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 296 } else { 297 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 298 } 299 } 300 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 301 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatDestroy_FFTW" 307 PetscErrorCode MatDestroy_FFTW(Mat A) 308 { 309 Mat_FFT *fft = (Mat_FFT*)A->data; 310 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 fftw_destroy_plan(fftw->p_forward); 315 fftw_destroy_plan(fftw->p_backward); 316 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 317 ierr = PetscFree(fft->data);CHKERRQ(ierr); 318 fftw_mpi_cleanup(); 319 PetscFunctionReturn(0); 320 } 321 322 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 323 #undef __FUNCT__ 324 #define __FUNCT__ "VecDestroy_MPIFFTW" 325 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 326 { 327 PetscErrorCode ierr; 328 PetscScalar *array; 329 330 PetscFunctionBegin; 331 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 332 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 333 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 334 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 #undef __FUNCT__ 339 #define __FUNCT__ "MatGetVecsFFTW" 340 /*@ 341 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 342 parallel layout determined by FFTW 343 344 Collective on Mat 345 346 Input Parameter: 347 . A - the matrix 348 349 Output Parameter: 350 + x - (optional) input vector of forward FFTW 351 - y - (optional) output vector of forward FFTW 352 - z - (optional) output vector of backward FFTW 353 354 Level: advanced 355 356 Note: The parallel layout of output of forward FFTW is always same as the input 357 of the backward FFTW. But parallel layout of the input vector of forward 358 FFTW might not be same as the output of backward FFTW. 359 Also note that we need to provide enough space while doing parallel real transform. 360 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 361 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 362 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 363 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 364 zeros if it is odd only one zero is needed. 365 Lastly one needs some scratch space at the end of data set in each process. alloc_local 366 figures out how much space is needed, i.e. it figures out the data+scratch space for 367 each processor and returns that. 368 369 .seealso: MatCreateFFTW() 370 @*/ 371 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 372 { 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 EXTERN_C_BEGIN 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 383 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 384 { 385 PetscErrorCode ierr; 386 PetscMPIInt size,rank; 387 MPI_Comm comm; 388 Mat_FFT *fft = (Mat_FFT*)A->data; 389 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 390 PetscInt N = fft->N; 391 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 392 393 PetscFunctionBegin; 394 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 395 PetscValidType(A,1); 396 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 397 398 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 399 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 400 if (size == 1) { /* sequential case */ 401 #if defined(PETSC_USE_COMPLEX) 402 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 403 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 404 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 405 #else 406 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 407 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 408 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 409 #endif 410 } else { /* parallel cases */ 411 ptrdiff_t alloc_local,local_n0,local_0_start; 412 ptrdiff_t local_n1; 413 fftw_complex *data_fout; 414 ptrdiff_t local_1_start; 415 #if defined(PETSC_USE_COMPLEX) 416 fftw_complex *data_fin,*data_bout; 417 #else 418 double *data_finr,*data_boutr; 419 PetscInt n1,N1; 420 ptrdiff_t temp; 421 #endif 422 423 switch (ndim) { 424 case 1: 425 #if !defined(PETSC_USE_COMPLEX) 426 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 427 #else 428 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 429 if (fin) { 430 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 431 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 432 433 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 434 } 435 if (fout) { 436 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 437 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 438 439 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 440 } 441 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 442 if (bout) { 443 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 444 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 445 446 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 447 } 448 break; 449 #endif 450 case 2: 451 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 452 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 453 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 454 if (fin) { 455 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 456 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 457 458 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 459 } 460 if (bout) { 461 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 462 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 463 464 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 465 } 466 if (fout) { 467 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 468 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 469 470 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 471 } 472 #else 473 /* Get local size */ 474 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 475 if (fin) { 476 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 477 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 478 479 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 if (fout) { 482 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 484 485 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 486 } 487 if (bout) { 488 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 490 491 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 492 } 493 #endif 494 break; 495 case 3: 496 #if !defined(PETSC_USE_COMPLEX) 497 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 498 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 499 if (fin) { 500 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 501 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 502 503 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 504 } 505 if (bout) { 506 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 507 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 508 509 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 510 } 511 512 if (fout) { 513 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 514 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 515 516 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 517 } 518 #else 519 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 520 if (fin) { 521 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 522 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 523 524 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 525 } 526 if (fout) { 527 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 529 530 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 531 } 532 if (bout) { 533 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 534 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 535 536 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 537 } 538 #endif 539 break; 540 default: 541 #if !defined(PETSC_USE_COMPLEX) 542 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 543 544 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 545 546 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 547 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 548 549 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 550 551 if (fin) { 552 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 553 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 554 555 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 556 } 557 if (bout) { 558 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 559 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 560 561 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 562 } 563 564 if (fout) { 565 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 566 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 567 568 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 #else 571 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 572 if (fin) { 573 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 574 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 575 576 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 if (fout) { 579 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 580 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 581 582 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 583 } 584 if (bout) { 585 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 587 588 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 589 } 590 #endif 591 break; 592 } 593 } 594 PetscFunctionReturn(0); 595 } 596 EXTERN_C_END 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "VecScatterPetscToFFTW" 600 /*@ 601 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 602 603 Collective on Mat 604 605 Input Parameters: 606 + A - FFTW matrix 607 - x - the PETSc vector 608 609 Output Parameters: 610 . y - the FFTW vector 611 612 Options Database Keys: 613 . -mat_fftw_plannerflags - set FFTW planner flags 614 615 Level: intermediate 616 617 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 618 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 619 zeros. This routine does that job by scattering operation. 620 621 .seealso: VecScatterFFTWToPetsc() 622 @*/ 623 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 624 { 625 PetscErrorCode ierr; 626 627 PetscFunctionBegin; 628 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 629 PetscFunctionReturn(0); 630 } 631 632 EXTERN_C_BEGIN 633 #undef __FUNCT__ 634 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 635 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 636 { 637 PetscErrorCode ierr; 638 MPI_Comm comm; 639 Mat_FFT *fft = (Mat_FFT*)A->data; 640 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 641 PetscInt N =fft->N; 642 PetscInt ndim =fft->ndim,*dim=fft->dim; 643 PetscInt low; 644 PetscInt rank,size,vsize,vsize1; 645 ptrdiff_t alloc_local,local_n0,local_0_start; 646 ptrdiff_t local_n1,local_1_start; 647 VecScatter vecscat; 648 IS list1,list2; 649 #if !defined(PETSC_USE_COMPLEX) 650 PetscInt i,j,k,partial_dim; 651 PetscInt *indx1, *indx2, tempindx, tempindx1; 652 PetscInt N1, n1,NM; 653 ptrdiff_t temp; 654 #endif 655 656 PetscFunctionBegin; 657 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 658 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 659 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 660 ierr = VecGetOwnershipRange(y,&low,NULL); 661 662 if (size==1) { 663 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 664 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 665 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 666 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 667 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 670 ierr = ISDestroy(&list1);CHKERRQ(ierr); 671 } else { 672 switch (ndim) { 673 case 1: 674 #if defined(PETSC_USE_COMPLEX) 675 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 676 677 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 678 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 679 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 680 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 681 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 682 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 683 ierr = ISDestroy(&list1);CHKERRQ(ierr); 684 ierr = ISDestroy(&list2);CHKERRQ(ierr); 685 #else 686 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 687 #endif 688 break; 689 case 2: 690 #if defined(PETSC_USE_COMPLEX) 691 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 692 693 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 694 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 695 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 696 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 697 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 698 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 699 ierr = ISDestroy(&list1);CHKERRQ(ierr); 700 ierr = ISDestroy(&list2);CHKERRQ(ierr); 701 #else 702 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 703 704 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 705 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 706 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 707 708 if (dim[1]%2==0) { 709 NM = dim[1]+2; 710 } else { 711 NM = dim[1]+1; 712 } 713 for (i=0; i<local_n0; i++) { 714 for (j=0; j<dim[1]; j++) { 715 tempindx = i*dim[1] + j; 716 tempindx1 = i*NM + j; 717 718 indx1[tempindx]=local_0_start*dim[1]+tempindx; 719 indx2[tempindx]=low+tempindx1; 720 } 721 } 722 723 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 724 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 725 726 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 727 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 730 ierr = ISDestroy(&list1);CHKERRQ(ierr); 731 ierr = ISDestroy(&list2);CHKERRQ(ierr); 732 ierr = PetscFree(indx1);CHKERRQ(ierr); 733 ierr = PetscFree(indx2);CHKERRQ(ierr); 734 #endif 735 break; 736 737 case 3: 738 #if defined(PETSC_USE_COMPLEX) 739 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 740 741 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 742 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 743 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 744 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 745 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 746 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 747 ierr = ISDestroy(&list1);CHKERRQ(ierr); 748 ierr = ISDestroy(&list2);CHKERRQ(ierr); 749 #else 750 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 751 752 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 753 754 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 755 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 756 757 if (dim[2]%2==0) NM = dim[2]+2; 758 else NM = dim[2]+1; 759 760 for (i=0; i<local_n0; i++) { 761 for (j=0; j<dim[1]; j++) { 762 for (k=0; k<dim[2]; k++) { 763 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 764 tempindx1 = i*dim[1]*NM + j*NM + k; 765 766 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 767 indx2[tempindx]=low+tempindx1; 768 } 769 } 770 } 771 772 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 773 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 774 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 775 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 776 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 777 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 778 ierr = ISDestroy(&list1);CHKERRQ(ierr); 779 ierr = ISDestroy(&list2);CHKERRQ(ierr); 780 ierr = PetscFree(indx1);CHKERRQ(ierr); 781 ierr = PetscFree(indx2);CHKERRQ(ierr); 782 #endif 783 break; 784 785 default: 786 #if defined(PETSC_USE_COMPLEX) 787 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 788 789 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 790 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 791 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 792 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 794 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 795 ierr = ISDestroy(&list1);CHKERRQ(ierr); 796 ierr = ISDestroy(&list2);CHKERRQ(ierr); 797 #else 798 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 799 800 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 801 802 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 803 804 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 805 806 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 807 808 partial_dim = fftw->partial_dim; 809 810 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 811 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 812 813 if (dim[ndim-1]%2==0) NM = 2; 814 else NM = 1; 815 816 j = low; 817 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 818 indx1[i] = local_0_start*partial_dim + i; 819 indx2[i] = j; 820 if (k%dim[ndim-1]==0) j+=NM; 821 j++; 822 } 823 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 824 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 825 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 826 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 829 ierr = ISDestroy(&list1);CHKERRQ(ierr); 830 ierr = ISDestroy(&list2);CHKERRQ(ierr); 831 ierr = PetscFree(indx1);CHKERRQ(ierr); 832 ierr = PetscFree(indx2);CHKERRQ(ierr); 833 #endif 834 break; 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 EXTERN_C_END 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "VecScatterFFTWToPetsc" 843 /*@ 844 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 845 846 Collective on Mat 847 848 Input Parameters: 849 + A - FFTW matrix 850 - x - FFTW vector 851 852 Output Parameters: 853 . y - PETSc vector 854 855 Level: intermediate 856 857 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 858 VecScatterFFTWToPetsc removes those extra zeros. 859 860 .seealso: VecScatterPetscToFFTW() 861 @*/ 862 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 863 { 864 PetscErrorCode ierr; 865 866 PetscFunctionBegin; 867 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 EXTERN_C_BEGIN 872 #undef __FUNCT__ 873 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 874 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 875 { 876 PetscErrorCode ierr; 877 MPI_Comm comm; 878 Mat_FFT *fft = (Mat_FFT*)A->data; 879 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 880 PetscInt N = fft->N; 881 PetscInt ndim = fft->ndim,*dim=fft->dim; 882 PetscInt low; 883 PetscInt rank,size; 884 ptrdiff_t alloc_local,local_n0,local_0_start; 885 ptrdiff_t local_n1,local_1_start; 886 VecScatter vecscat; 887 IS list1,list2; 888 #if !defined(PETSC_USE_COMPLEX) 889 PetscInt i,j,k,partial_dim; 890 PetscInt *indx1, *indx2, tempindx, tempindx1; 891 PetscInt N1, n1,NM; 892 ptrdiff_t temp; 893 #endif 894 895 PetscFunctionBegin; 896 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 897 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 898 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 899 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 900 901 if (size==1) { 902 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 903 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 904 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 907 ierr = ISDestroy(&list1);CHKERRQ(ierr); 908 909 } else { 910 switch (ndim) { 911 case 1: 912 #if defined(PETSC_USE_COMPLEX) 913 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 914 915 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 916 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 917 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 918 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 921 ierr = ISDestroy(&list1);CHKERRQ(ierr); 922 ierr = ISDestroy(&list2);CHKERRQ(ierr); 923 #else 924 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 925 #endif 926 break; 927 case 2: 928 #if defined(PETSC_USE_COMPLEX) 929 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 930 931 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 932 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 933 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 934 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 937 ierr = ISDestroy(&list1);CHKERRQ(ierr); 938 ierr = ISDestroy(&list2);CHKERRQ(ierr); 939 #else 940 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 941 942 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 943 944 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 945 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 946 947 if (dim[1]%2==0) NM = dim[1]+2; 948 else NM = dim[1]+1; 949 950 for (i=0; i<local_n0; i++) { 951 for (j=0; j<dim[1]; j++) { 952 tempindx = i*dim[1] + j; 953 tempindx1 = i*NM + j; 954 955 indx1[tempindx]=local_0_start*dim[1]+tempindx; 956 indx2[tempindx]=low+tempindx1; 957 } 958 } 959 960 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 961 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 962 963 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 964 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 967 ierr = ISDestroy(&list1);CHKERRQ(ierr); 968 ierr = ISDestroy(&list2);CHKERRQ(ierr); 969 ierr = PetscFree(indx1);CHKERRQ(ierr); 970 ierr = PetscFree(indx2);CHKERRQ(ierr); 971 #endif 972 break; 973 case 3: 974 #if defined(PETSC_USE_COMPLEX) 975 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 976 977 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 978 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 979 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 980 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 983 ierr = ISDestroy(&list1);CHKERRQ(ierr); 984 ierr = ISDestroy(&list2);CHKERRQ(ierr); 985 #else 986 987 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 988 989 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 990 991 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 992 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 993 994 if (dim[2]%2==0) NM = dim[2]+2; 995 else NM = dim[2]+1; 996 997 for (i=0; i<local_n0; i++) { 998 for (j=0; j<dim[1]; j++) { 999 for (k=0; k<dim[2]; k++) { 1000 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1001 tempindx1 = i*dim[1]*NM + j*NM + k; 1002 1003 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1004 indx2[tempindx]=low+tempindx1; 1005 } 1006 } 1007 } 1008 1009 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1010 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1011 1012 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1013 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1016 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1017 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1018 ierr = PetscFree(indx1);CHKERRQ(ierr); 1019 ierr = PetscFree(indx2);CHKERRQ(ierr); 1020 #endif 1021 break; 1022 default: 1023 #if defined(PETSC_USE_COMPLEX) 1024 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1025 1026 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1027 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1028 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1029 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1032 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1033 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1034 #else 1035 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1036 1037 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1038 1039 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1040 1041 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1042 1043 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1044 1045 partial_dim = fftw->partial_dim; 1046 1047 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1048 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1049 1050 if (dim[ndim-1]%2==0) NM = 2; 1051 else NM = 1; 1052 1053 j = low; 1054 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1055 indx1[i] = local_0_start*partial_dim + i; 1056 indx2[i] = j; 1057 if (k%dim[ndim-1]==0) j+=NM; 1058 j++; 1059 } 1060 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1061 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1062 1063 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1064 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1065 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1066 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1067 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1068 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1069 ierr = PetscFree(indx1);CHKERRQ(ierr); 1070 ierr = PetscFree(indx2);CHKERRQ(ierr); 1071 #endif 1072 break; 1073 } 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 EXTERN_C_END 1078 1079 EXTERN_C_BEGIN 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatCreate_FFTW" 1082 /* 1083 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1084 1085 Options Database Keys: 1086 + -mat_fftw_plannerflags - set FFTW planner flags 1087 1088 Level: intermediate 1089 1090 */ 1091 PetscErrorCode MatCreate_FFTW(Mat A) 1092 { 1093 PetscErrorCode ierr; 1094 MPI_Comm comm; 1095 Mat_FFT *fft=(Mat_FFT*)A->data; 1096 Mat_FFTW *fftw; 1097 PetscInt n =fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1098 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1099 PetscBool flg; 1100 PetscInt p_flag,partial_dim=1,ctr; 1101 PetscMPIInt size,rank; 1102 ptrdiff_t *pdim; 1103 ptrdiff_t local_n1,local_1_start; 1104 #if !defined(PETSC_USE_COMPLEX) 1105 ptrdiff_t temp; 1106 PetscInt N1,tot_dim; 1107 #else 1108 PetscInt n1; 1109 #endif 1110 1111 PetscFunctionBegin; 1112 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1113 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1114 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1115 1116 fftw_mpi_init(); 1117 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1118 pdim[0] = dim[0]; 1119 #if !defined(PETSC_USE_COMPLEX) 1120 tot_dim = 2*dim[0]; 1121 #endif 1122 for (ctr=1; ctr<ndim; ctr++) { 1123 partial_dim *= dim[ctr]; 1124 pdim[ctr] = dim[ctr]; 1125 #if !defined(PETSC_USE_COMPLEX) 1126 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1127 else tot_dim *= dim[ctr]; 1128 #endif 1129 } 1130 1131 if (size == 1) { 1132 #if defined(PETSC_USE_COMPLEX) 1133 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1134 n = N; 1135 #else 1136 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1137 n = tot_dim; 1138 #endif 1139 1140 } else { 1141 ptrdiff_t alloc_local,local_n0,local_0_start; 1142 switch (ndim) { 1143 case 1: 1144 #if !defined(PETSC_USE_COMPLEX) 1145 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1146 #else 1147 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1148 1149 n = (PetscInt)local_n0; 1150 n1 = (PetscInt)local_n1; 1151 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1152 #endif 1153 break; 1154 case 2: 1155 #if defined(PETSC_USE_COMPLEX) 1156 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1157 /* 1158 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1159 PetscSynchronizedFlush(comm); 1160 */ 1161 n = (PetscInt)local_n0*dim[1]; 1162 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1163 #else 1164 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1165 1166 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1167 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1168 #endif 1169 break; 1170 case 3: 1171 #if defined(PETSC_USE_COMPLEX) 1172 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1173 1174 n = (PetscInt)local_n0*dim[1]*dim[2]; 1175 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1176 #else 1177 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1178 1179 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1180 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1181 #endif 1182 break; 1183 default: 1184 #if defined(PETSC_USE_COMPLEX) 1185 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1186 1187 n = (PetscInt)local_n0*partial_dim; 1188 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1189 #else 1190 temp = pdim[ndim-1]; 1191 1192 pdim[ndim-1] = temp/2 + 1; 1193 1194 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1195 1196 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1197 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1198 1199 pdim[ndim-1] = temp; 1200 1201 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1202 #endif 1203 break; 1204 } 1205 } 1206 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1207 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1208 fft->data = (void*)fftw; 1209 1210 fft->n = n; 1211 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1212 fftw->partial_dim = partial_dim; 1213 1214 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t*)&(fftw->dim_fftw));CHKERRQ(ierr); 1215 1216 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1217 1218 fftw->p_forward = 0; 1219 fftw->p_backward = 0; 1220 fftw->p_flag = FFTW_ESTIMATE; 1221 1222 if (size == 1) { 1223 A->ops->mult = MatMult_SeqFFTW; 1224 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1225 } else { 1226 A->ops->mult = MatMult_MPIFFTW; 1227 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1228 } 1229 fft->matdestroy = MatDestroy_FFTW; 1230 A->assembled = PETSC_TRUE; 1231 A->preallocated = PETSC_TRUE; 1232 1233 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1236 1237 /* get runtime options */ 1238 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1239 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1240 if (flg) fftw->p_flag = (unsigned)p_flag; 1241 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 EXTERN_C_END 1245 1246 1247 1248 1249