1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28 extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*); 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMult_SeqFFTW" 32 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 33 { 34 PetscErrorCode ierr; 35 Mat_FFT *fft = (Mat_FFT*)A->data; 36 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 37 PetscScalar *x_array,*y_array; 38 PetscInt ndim=fft->ndim,*dim=fft->dim; 39 40 PetscFunctionBegin; 41 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 42 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 43 if (!fftw->p_forward){ /* create a plan, then excute it */ 44 switch (ndim){ 45 case 1: 46 #if defined(PETSC_USE_COMPLEX) 47 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 48 #else 49 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 50 #endif 51 break; 52 case 2: 53 #if defined(PETSC_USE_COMPLEX) 54 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55 #else 56 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 57 #endif 58 break; 59 case 3: 60 #if defined(PETSC_USE_COMPLEX) 61 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 62 #else 63 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 64 #endif 65 break; 66 default: 67 #if defined(PETSC_USE_COMPLEX) 68 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 69 #else 70 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 71 #endif 72 break; 73 } 74 fftw->finarray = x_array; 75 fftw->foutarray = y_array; 76 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 77 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 78 fftw_execute(fftw->p_forward); 79 } else { /* use existing plan */ 80 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 81 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 82 } else { 83 fftw_execute(fftw->p_forward); 84 } 85 } 86 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 87 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 93 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 94 { 95 PetscErrorCode ierr; 96 Mat_FFT *fft = (Mat_FFT*)A->data; 97 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 98 PetscScalar *x_array,*y_array; 99 PetscInt ndim=fft->ndim,*dim=fft->dim; 100 101 PetscFunctionBegin; 102 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 103 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 104 if (!fftw->p_backward){ /* create a plan, then excute it */ 105 switch (ndim){ 106 case 1: 107 #if defined(PETSC_USE_COMPLEX) 108 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 109 #else 110 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 111 #endif 112 break; 113 case 2: 114 #if defined(PETSC_USE_COMPLEX) 115 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 116 #else 117 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 118 #endif 119 break; 120 case 3: 121 #if defined(PETSC_USE_COMPLEX) 122 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 123 #else 124 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 125 #endif 126 break; 127 default: 128 #if defined(PETSC_USE_COMPLEX) 129 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 130 #else 131 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 132 #endif 133 break; 134 } 135 fftw->binarray = x_array; 136 fftw->boutarray = y_array; 137 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 138 } else { /* use existing plan */ 139 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 140 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 141 } else { 142 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 143 } 144 } 145 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 146 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "MatMult_MPIFFTW" 152 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 153 { 154 PetscErrorCode ierr; 155 Mat_FFT *fft = (Mat_FFT*)A->data; 156 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 157 PetscScalar *x_array,*y_array; 158 PetscInt ndim=fft->ndim,*dim=fft->dim; 159 MPI_Comm comm=((PetscObject)A)->comm; 160 161 PetscFunctionBegin; 162 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 163 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 164 if (!fftw->p_forward){ /* create a plan, then excute it */ 165 switch (ndim){ 166 case 1: 167 #if defined(PETSC_USE_COMPLEX) 168 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 169 #else 170 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 171 #endif 172 break; 173 case 2: 174 #if defined(PETSC_USE_COMPLEX) 175 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 176 #else 177 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 178 #endif 179 break; 180 case 3: 181 #if defined(PETSC_USE_COMPLEX) 182 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 183 #else 184 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 185 #endif 186 break; 187 default: 188 #if defined(PETSC_USE_COMPLEX) 189 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 190 #else 191 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 192 #endif 193 break; 194 } 195 fftw->finarray = x_array; 196 fftw->foutarray = y_array; 197 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 198 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 199 fftw_execute(fftw->p_forward); 200 } else { /* use existing plan */ 201 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 202 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 203 } else { 204 fftw_execute(fftw->p_forward); 205 } 206 } 207 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 208 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 214 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 215 { 216 PetscErrorCode ierr; 217 Mat_FFT *fft = (Mat_FFT*)A->data; 218 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 219 PetscScalar *x_array,*y_array; 220 PetscInt ndim=fft->ndim,*dim=fft->dim; 221 MPI_Comm comm=((PetscObject)A)->comm; 222 223 PetscFunctionBegin; 224 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 225 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 226 if (!fftw->p_backward){ /* create a plan, then excute it */ 227 switch (ndim){ 228 case 1: 229 #if defined(PETSC_USE_COMPLEX) 230 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 231 #else 232 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 233 #endif 234 break; 235 case 2: 236 #if defined(PETSC_USE_COMPLEX) 237 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 238 #else 239 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 240 #endif 241 break; 242 case 3: 243 #if defined(PETSC_USE_COMPLEX) 244 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 245 #else 246 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 247 #endif 248 break; 249 default: 250 #if defined(PETSC_USE_COMPLEX) 251 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 252 #else 253 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 254 #endif 255 break; 256 } 257 fftw->binarray = x_array; 258 fftw->boutarray = y_array; 259 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 260 } else { /* use existing plan */ 261 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 262 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 263 } else { 264 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 265 } 266 } 267 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 268 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatDestroy_FFTW" 274 PetscErrorCode MatDestroy_FFTW(Mat A) 275 { 276 Mat_FFT *fft = (Mat_FFT*)A->data; 277 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 fftw_destroy_plan(fftw->p_forward); 282 fftw_destroy_plan(fftw->p_backward); 283 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 284 ierr = PetscFree(fft->data);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 289 #undef __FUNCT__ 290 #define __FUNCT__ "VecDestroy_MPIFFTW" 291 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 292 { 293 PetscErrorCode ierr; 294 PetscScalar *array; 295 296 PetscFunctionBegin; 297 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 298 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 299 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 300 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatGetVecs1DC_FFTW" 306 /* 307 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 308 parallel layout determined by FFTW-1D 309 310 */ 311 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout) 312 { 313 PetscErrorCode ierr; 314 PetscMPIInt size,rank; 315 MPI_Comm comm=((PetscObject)A)->comm; 316 Mat_FFT *fft = (Mat_FFT*)A->data; 317 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318 PetscInt N=fft->N; 319 PetscInt ndim=fft->ndim,*dim=fft->dim; 320 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 321 ptrdiff_t f_local_n1,f_local_1_end; 322 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 323 ptrdiff_t b_local_n1,b_local_1_end; 324 fftw_complex *data_fin,*data_fout,*data_bout; 325 326 PetscFunctionBegin; 327 #if !defined(PETSC_USE_COMPLEX) 328 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 329 #endif 330 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 331 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 332 if (size == 1){ 333 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 334 } 335 else { 336 if (ndim>1){ 337 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 338 else { 339 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 340 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 341 if (fin) { 342 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 343 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 344 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 345 } 346 if (fout) { 347 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 348 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 349 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 350 } 351 if (bout) { 352 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 353 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 354 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 355 } 356 } 357 if (fin){ 358 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 359 } 360 if (fout){ 361 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 362 } 363 if (bout){ 364 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 365 } 366 PetscFunctionReturn(0); 367 } 368 369 370 } 371 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "MatGetVecsFFT" 375 PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z) 376 { 377 PetscErrorCode ierr; 378 PetscFunctionBegin; 379 ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 380 PetscFunctionReturn(0); 381 } 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatGetVecsFFT_FFTW" 385 /* 386 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 387 parallel layout determined by FFTW 388 389 Collective on Mat 390 391 Input Parameter: 392 . mat - the matrix 393 394 Output Parameter: 395 + fin - (optional) input vector of forward FFTW 396 - fout - (optional) output vector of forward FFTW 397 398 Level: advanced 399 400 .seealso: MatCreateFFTW() 401 */ 402 EXTERN_C_BEGIN 403 PetscErrorCode MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 404 { 405 PetscErrorCode ierr; 406 PetscMPIInt size,rank; 407 MPI_Comm comm=((PetscObject)A)->comm; 408 Mat_FFT *fft = (Mat_FFT*)A->data; 409 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 410 PetscInt N=fft->N, N1, n1,vsize; 411 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 412 413 PetscFunctionBegin; 414 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 415 PetscValidType(A,1); 416 417 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 418 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 419 printf("size is %d\n",size); 420 if (size == 1){ /* sequential case */ 421 printf("Routine is getting called\n"); 422 #if defined(PETSC_USE_COMPLEX) 423 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 424 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 425 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 426 #else 427 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 428 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 429 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 430 #endif 431 } else { 432 ptrdiff_t alloc_local,local_n0,local_0_start; 433 ptrdiff_t local_n1,local_1_end; 434 fftw_complex *data_fin,*data_fout,*data_bout; 435 double *data_finr,*data_boutr ; 436 ptrdiff_t local_1_start,temp; 437 switch (ndim){ 438 case 1: 439 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 440 case 2: 441 #if !defined(PETSC_USE_COMPLEX) 442 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 443 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 444 if (fin) { 445 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 446 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 447 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 448 //printf("The code comes here with vector size %d\n",vsize); 449 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 450 } 451 if (fout) { 452 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 453 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 454 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 455 } 456 if (bout) { 457 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 458 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 459 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 460 //printf("The code comes here with vector size %d\n",vsize); 461 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 462 } 463 464 //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 465 466 #else 467 /* Get local size */ 468 printf("Code works for paralllel 2d complex DFT\n"); 469 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 470 if (fin) { 471 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 472 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 473 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 474 } 475 if (fout) { 476 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 477 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 478 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 479 } 480 if (bout) { 481 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 482 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 483 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 484 } 485 486 //printf("Hope this does not come here"); 487 #endif 488 break; 489 490 case 3: 491 #if !defined(PETSC_USE_COMPLEX) 492 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 493 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 494 if (fin) { 495 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 496 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 497 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 498 //printf("The code comes here with vector size %d\n",vsize); 499 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 500 } 501 if (fout) { 502 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 503 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 504 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 505 } 506 if (bout) { 507 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 508 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 509 // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 510 //printf("The code comes here with vector size %d\n",vsize); 511 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 512 } 513 514 //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 515 516 #else 517 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 518 // printf("The quantity n is %d",n); 519 if (fin) { 520 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 521 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 522 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 523 } 524 if (fout) { 525 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 527 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 528 } 529 if (bout) { 530 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 531 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 532 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 533 } 534 535 #endif 536 break; 537 default: 538 #if !defined(PETSC_USE_COMPLEX) 539 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 540 printf("The value of temp is %ld\n",temp); 541 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 542 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 543 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 544 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 545 546 if (fin) { 547 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 548 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 549 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 550 //printf("The code comes here with vector size %d\n",vsize); 551 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 if (fout) { 554 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 555 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 556 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 if (bout) { 559 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 560 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 561 //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 562 //printf("The code comes here with vector size %d\n",vsize); 563 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 564 } 565 566 #else 567 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 568 if (fin) { 569 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 570 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 571 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 572 } 573 if (fout) { 574 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 575 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 576 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 if (bout) { 579 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 580 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 581 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 582 } 583 584 585 586 #endif 587 break; 588 } 589 } 590 PetscFunctionReturn(0); 591 } 592 EXTERN_C_END 593 594 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatGetVecs_FFTW" 598 /* 599 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 600 parallel layout determined by FFTW 601 602 Collective on Mat 603 604 Input Parameter: 605 . mat - the matrix 606 607 Output Parameter: 608 + fin - (optional) input vector of forward FFTW 609 - fout - (optional) output vector of forward FFTW 610 611 Level: advanced 612 613 .seealso: MatCreateFFTW() 614 */ 615 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 616 { 617 PetscErrorCode ierr; 618 PetscMPIInt size,rank; 619 MPI_Comm comm=((PetscObject)A)->comm; 620 Mat_FFT *fft = (Mat_FFT*)A->data; 621 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 622 PetscInt N=fft->N, N1, n1,vsize; 623 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 624 625 PetscFunctionBegin; 626 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 627 PetscValidType(A,1); 628 629 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 630 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 631 if (size == 1){ /* sequential case */ 632 #if defined(PETSC_USE_COMPLEX) 633 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 634 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 635 #else 636 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 637 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 638 #endif 639 } else { /* mpi case */ 640 ptrdiff_t alloc_local,local_n0,local_0_start; 641 ptrdiff_t local_n1,local_1_end; 642 fftw_complex *data_fin,*data_fout; 643 double *data_finr ; 644 ptrdiff_t local_1_start,temp; 645 // PetscInt ctr; 646 // ptrdiff_t ndim1,*pdim; 647 // ndim1=(ptrdiff_t) ndim; 648 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 649 650 // for(ctr=0;ctr<ndim;ctr++) 651 // {k 652 // pdim[ctr] = dim[ctr]; 653 // } 654 655 656 657 switch (ndim){ 658 case 1: 659 /* Get local size */ 660 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 661 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 662 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 663 if (fin) { 664 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 665 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 666 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 667 } 668 if (fout) { 669 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 670 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 671 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 672 } 673 break; 674 case 2: 675 #if !defined(PETSC_USE_COMPLEX) 676 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 677 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 678 if (fin) { 679 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 680 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 681 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 682 //printf("The code comes here with vector size %d\n",vsize); 683 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 684 } 685 if (fout) { 686 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 687 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 688 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 689 } 690 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 691 692 #else 693 /* Get local size */ 694 //printf("Hope this does not come here"); 695 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 696 if (fin) { 697 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 698 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 699 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 700 } 701 if (fout) { 702 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 703 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 704 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 705 } 706 //printf("Hope this does not come here"); 707 #endif 708 break; 709 case 3: 710 /* Get local size */ 711 #if !defined(PETSC_USE_COMPLEX) 712 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 713 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 714 if (fin) { 715 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 716 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 717 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 718 //printf("The code comes here with vector size %d\n",vsize); 719 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 720 } 721 if (fout) { 722 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 723 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 724 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 725 } 726 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 727 728 729 #else 730 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 731 // printf("The quantity n is %d",n); 732 if (fin) { 733 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 734 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 735 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 736 } 737 if (fout) { 738 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 739 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 740 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 741 } 742 #endif 743 break; 744 default: 745 /* Get local size */ 746 #if !defined(PETSC_USE_COMPLEX) 747 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 748 printf("The value of temp is %ld\n",temp); 749 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 750 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 751 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 752 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 753 if (fin) { 754 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 755 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 756 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 757 //printf("The code comes here with vector size %d\n",vsize); 758 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 759 } 760 if (fout) { 761 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 762 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 763 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 764 } 765 766 #else 767 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 768 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 769 // printf("The value of alloc local is %d",alloc_local); 770 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 771 // for(i=0;i<ndim;i++) 772 // { 773 // pdim[i]=dim[i];printf("%d",pdim[i]); 774 // } 775 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 776 // printf("The quantity n is %d",n); 777 if (fin) { 778 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 779 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 780 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 781 } 782 if (fout) { 783 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 784 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 785 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 786 } 787 #endif 788 break; 789 } 790 } 791 // if (fin){ 792 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 793 // } 794 // if (fout){ 795 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 796 // } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "InputTransformFFT" 802 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 803 { 804 PetscErrorCode ierr; 805 PetscFunctionBegin; 806 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 /* 811 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 812 Input A, x, y 813 A - FFTW matrix 814 x - user data 815 Options Database Keys: 816 + -mat_fftw_plannerflags - set FFTW planner flags 817 818 Level: intermediate 819 820 */ 821 822 EXTERN_C_BEGIN 823 #undef __FUNCT__ 824 #define __FUNCT__ "InputTransformFFT_FTTW" 825 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 826 { 827 PetscErrorCode ierr; 828 MPI_Comm comm=((PetscObject)A)->comm; 829 Mat_FFT *fft = (Mat_FFT*)A->data; 830 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 831 PetscInt N=fft->N, N1, n1 ,NM; 832 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 833 PetscInt low, *indx1, *indx2, tempindx, tempindx1; 834 PetscInt i,j,k,rank,size,partial_dim; 835 ptrdiff_t alloc_local,local_n0,local_0_start; 836 ptrdiff_t local_n1,local_1_start,temp; 837 VecScatter vecscat; 838 IS list1,list2; 839 840 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 841 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 842 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 843 printf("Local ownership starts at %d\n",low); 844 845 if (size==1) 846 { 847 /* switch (ndim){ 848 case 1: 849 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 850 for (i=0;i<dim[0];i++) 851 { 852 indx1[i] = i; 853 } 854 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 855 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 856 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 858 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 859 ierr = ISDestroy(&list1);CHKERRQ(ierr); 860 ierr = PetscFree(indx1);CHKERRQ(ierr); 861 break; 862 863 case 2: 864 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 865 for (i=0;i<dim[0];i++){ 866 for (j=0;j<dim[1];j++){ 867 indx1[i*dim[1]+j] = i*dim[1] + j; 868 } 869 } 870 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 871 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 872 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 875 ierr = ISDestroy(&list1);CHKERRQ(ierr); 876 ierr = PetscFree(indx1);CHKERRQ(ierr); 877 break; 878 case 3: 879 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 880 for (i=0;i<dim[0];i++){ 881 for (j=0;j<dim[1];j++){ 882 for (k=0;k<dim[2];k++){ 883 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 884 } 885 } 886 } 887 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 888 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 889 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 892 ierr = ISDestroy(&list1);CHKERRQ(ierr); 893 ierr = PetscFree(indx1);CHKERRQ(ierr); 894 break; 895 default: 896 */ 897 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 898 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 899 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 902 ierr = ISDestroy(&list1);CHKERRQ(ierr); 903 //ierr = ISDestroy(list1);CHKERRQ(ierr); 904 // break; 905 // } 906 } 907 908 else{ 909 910 switch (ndim){ 911 case 1: 912 #if defined(PETSC_USE_COMPLEX) 913 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 914 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 915 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 916 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 917 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 918 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 919 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 922 ierr = ISDestroy(&list1);CHKERRQ(ierr); 923 ierr = ISDestroy(&list2);CHKERRQ(ierr); 924 break; 925 #else 926 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 927 #endif 928 break; 929 case 2: 930 #if defined(PETSC_USE_COMPLEX) 931 //PetscInt my_value; 932 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 933 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 934 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 935 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 936 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 937 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 938 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 939 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 941 ierr = ISDestroy(&list1);CHKERRQ(ierr); 942 ierr = ISDestroy(&list2);CHKERRQ(ierr); 943 break; 944 #else 945 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 946 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 947 948 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 949 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 950 printf("Val local_0_start = %ld\n",local_0_start); 951 952 if (dim[1]%2==0) 953 NM = dim[1]+2; 954 else 955 NM = dim[1]+1; 956 957 for (i=0;i<local_n0;i++){ 958 for (j=0;j<dim[1];j++){ 959 tempindx = i*dim[1] + j; 960 tempindx1 = i*NM + j; 961 indx1[tempindx]=local_0_start*dim[1]+tempindx; 962 indx2[tempindx]=low+tempindx1; 963 // printf("Val tempindx1 = %d\n",tempindx1); 964 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 965 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 966 // printf("-------------------------\n",indx2[tempindx],rank); 967 } 968 } 969 970 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 971 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 972 973 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 974 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 977 ierr = ISDestroy(&list1);CHKERRQ(ierr); 978 ierr = ISDestroy(&list2);CHKERRQ(ierr); 979 ierr = PetscFree(indx1);CHKERRQ(ierr); 980 ierr = PetscFree(indx2);CHKERRQ(ierr); 981 break; 982 #endif 983 984 case 3: 985 #if defined(PETSC_USE_COMPLEX) 986 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 987 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 988 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 989 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 990 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 991 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 992 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 994 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 995 ierr = ISDestroy(&list1);CHKERRQ(ierr); 996 ierr = ISDestroy(&list2);CHKERRQ(ierr); 997 break; 998 #else 999 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1000 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1001 1002 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1003 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1004 printf("Val local_0_start = %ld\n",local_0_start); 1005 1006 if (dim[2]%2==0) 1007 NM = dim[2]+2; 1008 else 1009 NM = dim[2]+1; 1010 1011 for (i=0;i<local_n0;i++){ 1012 for (j=0;j<dim[1];j++){ 1013 for (k=0;k<dim[2];k++){ 1014 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1015 tempindx1 = i*dim[1]*NM + j*NM + k; 1016 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1017 indx2[tempindx]=low+tempindx1; 1018 } 1019 // printf("Val tempindx1 = %d\n",tempindx1); 1020 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1021 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1022 // printf("-------------------------\n",indx2[tempindx],rank); 1023 } 1024 } 1025 1026 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1027 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1028 1029 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1030 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1033 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1034 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1035 ierr = PetscFree(indx1);CHKERRQ(ierr); 1036 ierr = PetscFree(indx2);CHKERRQ(ierr); 1037 break; 1038 #endif 1039 1040 default: 1041 #if defined(PETSC_USE_COMPLEX) 1042 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1043 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1044 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1045 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1046 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1047 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1048 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1051 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1052 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1053 1054 1055 #else 1056 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1057 printf("The value of temp is %ld\n",temp); 1058 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1059 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1060 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1061 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1062 1063 partial_dim = fftw->partial_dim; 1064 printf("The value of partial dim is %d\n",partial_dim); 1065 1066 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1067 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1068 printf("Val local_0_start = %ld\n",local_0_start); 1069 1070 if (dim[ndim-1]%2==0) 1071 NM = 2; 1072 else 1073 NM = 1; 1074 1075 j = low; 1076 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1077 { 1078 indx1[i] = local_0_start*partial_dim + i; 1079 indx2[i] = j; 1080 //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1081 if (k%dim[ndim-1]==0) 1082 { j+=NM;} 1083 j++; 1084 } 1085 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1086 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1087 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1088 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1091 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1092 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1093 ierr = PetscFree(indx1);CHKERRQ(ierr); 1094 ierr = PetscFree(indx2);CHKERRQ(ierr); 1095 break; 1096 #endif 1097 } 1098 } 1099 1100 return 0; 1101 } 1102 EXTERN_C_END 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "OutputTransformFFT" 1106 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 1107 { 1108 PetscErrorCode ierr; 1109 PetscFunctionBegin; 1110 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 /* 1115 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 1116 Input A, x, y 1117 A - FFTW matrix 1118 x - FFTW vector 1119 y - PETSc vector that user can read 1120 Options Database Keys: 1121 + -mat_fftw_plannerflags - set FFTW planner flags 1122 1123 Level: intermediate 1124 1125 */ 1126 1127 EXTERN_C_BEGIN 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "OutputTransformFFT_FTTW" 1130 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 1131 { 1132 PetscErrorCode ierr; 1133 MPI_Comm comm=((PetscObject)A)->comm; 1134 Mat_FFT *fft = (Mat_FFT*)A->data; 1135 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1136 PetscInt N=fft->N, N1, n1 ,NM; 1137 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 1138 PetscInt low, *indx1, *indx2, tempindx, tempindx1; 1139 PetscInt i,j,k,rank,size,partial_dim; 1140 ptrdiff_t alloc_local,local_n0,local_0_start; 1141 ptrdiff_t local_n1,local_1_start,temp; 1142 VecScatter vecscat; 1143 IS list1,list2; 1144 1145 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1146 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1147 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 1148 1149 if (size==1){ 1150 /* 1151 switch (ndim){ 1152 case 1: 1153 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1154 for (i=0;i<dim[0];i++) 1155 { 1156 indx1[i] = i; 1157 } 1158 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1159 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1160 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1161 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1162 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1163 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1164 ierr = PetscFree(indx1);CHKERRQ(ierr); 1165 break; 1166 1167 case 2: 1168 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1169 for (i=0;i<dim[0];i++){ 1170 for (j=0;j<dim[1];j++){ 1171 indx1[i*dim[1]+j] = i*dim[1] + j; 1172 } 1173 } 1174 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1175 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1176 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1178 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1179 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1180 ierr = PetscFree(indx1);CHKERRQ(ierr); 1181 break; 1182 case 3: 1183 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1184 for (i=0;i<dim[0];i++){ 1185 for (j=0;j<dim[1];j++){ 1186 for (k=0;k<dim[2];k++){ 1187 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1188 } 1189 } 1190 } 1191 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1192 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1193 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1194 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1195 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1196 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1197 ierr = PetscFree(indx1);CHKERRQ(ierr); 1198 break; 1199 default: 1200 */ 1201 ierr = ISCreateStride(comm,N,0,1,&list1); 1202 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1203 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1204 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1205 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1207 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1208 // break; 1209 // } 1210 } 1211 else{ 1212 1213 switch (ndim){ 1214 case 1: 1215 #if defined(PETSC_USE_COMPLEX) 1216 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1217 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 1218 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 1219 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1220 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1221 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1222 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1224 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1225 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1226 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1227 break; 1228 1229 #else 1230 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1231 #endif 1232 break; 1233 case 2: 1234 #if defined(PETSC_USE_COMPLEX) 1235 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1236 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1237 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1238 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1239 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1241 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1242 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1243 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1244 break; 1245 #else 1246 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1247 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 1248 1249 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1250 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1251 printf("Val local_0_start = %ld\n",local_0_start); 1252 1253 if (dim[1]%2==0) 1254 NM = dim[1]+2; 1255 else 1256 NM = dim[1]+1; 1257 1258 1259 1260 for (i=0;i<local_n0;i++){ 1261 for (j=0;j<dim[1];j++){ 1262 tempindx = i*dim[1] + j; 1263 tempindx1 = i*NM + j; 1264 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1265 indx2[tempindx]=low+tempindx1; 1266 // printf("Val tempindx1 = %d\n",tempindx1); 1267 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1268 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1269 // printf("-------------------------\n",indx2[tempindx],rank); 1270 } 1271 } 1272 1273 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1274 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1275 1276 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1277 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1280 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1281 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1282 ierr = PetscFree(indx1);CHKERRQ(ierr); 1283 ierr = PetscFree(indx2);CHKERRQ(ierr); 1284 break; 1285 #endif 1286 case 3: 1287 #if defined(PETSC_USE_COMPLEX) 1288 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1289 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1290 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1291 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1292 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1293 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1294 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1295 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1296 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1297 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1298 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1299 break; 1300 #else 1301 1302 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1303 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1304 1305 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1306 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1307 printf("Val local_0_start = %ld\n",local_0_start); 1308 1309 if (dim[2]%2==0) 1310 NM = dim[2]+2; 1311 else 1312 NM = dim[2]+1; 1313 1314 for (i=0;i<local_n0;i++){ 1315 for (j=0;j<dim[1];j++){ 1316 for (k=0;k<dim[2];k++){ 1317 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1318 tempindx1 = i*dim[1]*NM + j*NM + k; 1319 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1320 indx2[tempindx]=low+tempindx1; 1321 } 1322 // printf("Val tempindx1 = %d\n",tempindx1); 1323 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1324 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1325 // printf("-------------------------\n",indx2[tempindx],rank); 1326 } 1327 } 1328 1329 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1331 1332 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1333 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1336 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1337 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1338 ierr = PetscFree(indx1);CHKERRQ(ierr); 1339 ierr = PetscFree(indx2);CHKERRQ(ierr); 1340 break; 1341 #endif 1342 default: 1343 #if defined(PETSC_USE_COMPLEX) 1344 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1345 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1346 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1347 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1348 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1349 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1350 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1353 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1354 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1355 #else 1356 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1357 printf("The value of temp is %ld\n",temp); 1358 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1359 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1360 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1361 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1362 1363 partial_dim = fftw->partial_dim; 1364 printf("The value of partial dim is %d\n",partial_dim); 1365 1366 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1367 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1368 printf("Val local_0_start = %ld\n",local_0_start); 1369 1370 if (dim[ndim-1]%2==0) 1371 NM = 2; 1372 else 1373 NM = 1; 1374 1375 j = low; 1376 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1377 { 1378 indx1[i] = local_0_start*partial_dim + i; 1379 indx2[i] = j; 1380 //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1381 if (k%dim[ndim-1]==0) 1382 { j+=NM;} 1383 j++; 1384 } 1385 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1386 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1387 1388 //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1389 1390 1391 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1392 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1393 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1394 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1395 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1396 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1397 ierr = PetscFree(indx1);CHKERRQ(ierr); 1398 ierr = PetscFree(indx2);CHKERRQ(ierr); 1399 1400 break; 1401 #endif 1402 } 1403 } 1404 return 0; 1405 } 1406 EXTERN_C_END 1407 1408 EXTERN_C_BEGIN 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatCreate_FFTW" 1411 /* 1412 MatCreate_FFTW - Creates a matrix object that provides FFT 1413 via the external package FFTW 1414 Options Database Keys: 1415 + -mat_fftw_plannerflags - set FFTW planner flags 1416 1417 Level: intermediate 1418 1419 */ 1420 1421 PetscErrorCode MatCreate_FFTW(Mat A) 1422 { 1423 PetscErrorCode ierr; 1424 MPI_Comm comm=((PetscObject)A)->comm; 1425 Mat_FFT *fft=(Mat_FFT*)A->data; 1426 Mat_FFTW *fftw; 1427 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1428 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1429 PetscBool flg; 1430 PetscInt p_flag,partial_dim=1,ctr,N1; 1431 PetscMPIInt size,rank; 1432 ptrdiff_t *pdim, temp; 1433 ptrdiff_t local_n1,local_1_start,local_1_end; 1434 1435 PetscFunctionBegin; 1436 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1437 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1438 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1439 1440 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1441 pdim[0] = dim[0]; 1442 for(ctr=1;ctr<ndim;ctr++) 1443 { 1444 partial_dim *= dim[ctr]; 1445 pdim[ctr] = dim[ctr]; 1446 } 1447 1448 if (size == 1) { 1449 #if defined(PETSC_USE_COMPLEX) 1450 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1451 n = N; 1452 #else 1453 int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1454 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1455 n = tot_dim; 1456 #endif 1457 1458 } else { 1459 ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1460 switch (ndim){ 1461 case 1: 1462 #if !defined(PETSC_USE_COMPLEX) 1463 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1464 #else 1465 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 1466 n = (PetscInt)local_n0; 1467 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1468 #endif 1469 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1470 break; 1471 case 2: 1472 #if defined(PETSC_USE_COMPLEX) 1473 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1474 /* 1475 PetscMPIInt rank; 1476 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1477 PetscSynchronizedFlush(comm); 1478 */ 1479 n = (PetscInt)local_n0*dim[1]; 1480 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1481 #else 1482 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1483 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1484 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1485 #endif 1486 break; 1487 case 3: 1488 // printf("The value of alloc local is %d",alloc_local); 1489 #if defined(PETSC_USE_COMPLEX) 1490 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1491 n = (PetscInt)local_n0*dim[1]*dim[2]; 1492 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1493 #else 1494 printf("The code comes here\n"); 1495 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1496 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1497 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1498 #endif 1499 break; 1500 default: 1501 #if defined(PETSC_USE_COMPLEX) 1502 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1503 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 1504 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 1505 n = (PetscInt)local_n0*partial_dim; 1506 // printf("New partial dimension is %d %d %d",n,N,ndim); 1507 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1508 #else 1509 temp = pdim[ndim-1]; 1510 pdim[ndim-1]= temp/2 + 1; 1511 printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1512 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1513 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1514 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1515 pdim[ndim-1] = temp; 1516 printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1517 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1518 #endif 1519 break; 1520 } 1521 } 1522 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1523 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1524 fft->data = (void*)fftw; 1525 1526 fft->n = n; 1527 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1528 fftw->partial_dim = partial_dim; 1529 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1530 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1531 1532 fftw->p_forward = 0; 1533 fftw->p_backward = 0; 1534 fftw->p_flag = FFTW_ESTIMATE; 1535 1536 if (size == 1){ 1537 A->ops->mult = MatMult_SeqFFTW; 1538 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1539 } else { 1540 A->ops->mult = MatMult_MPIFFTW; 1541 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1542 } 1543 fft->matdestroy = MatDestroy_FFTW; 1544 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1545 A->ops->getvecs = MatGetVecs_FFTW; 1546 A->assembled = PETSC_TRUE; 1547 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW); 1548 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1549 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1550 1551 /* get runtime options */ 1552 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1553 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1554 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1555 PetscOptionsEnd(); 1556 PetscFunctionReturn(0); 1557 } 1558 EXTERN_C_END 1559 1560 1561 1562 1563