Skip to content

Commit 2ac6ca2

Browse files
authored
Merge pull request #87 from PrometheusPi/makeFFTmoreReadable
Use FFTW to speed up fourier transform
2 parents 70ef420 + 2d6eb7d commit 2ac6ca2

File tree

7 files changed

+253
-10
lines changed

7 files changed

+253
-10
lines changed

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,12 @@ Software License
3030
*Clara2* is licensed under the **GPLv3+**. You can use any of our *libraries* with
3131
**GPLv3+ or LGPLv3+** (they are *dual-licensed*).
3232
Please refer to our [LICENSE](LICENSE)
33+
34+
35+
Dependency
36+
----------
37+
38+
*Clara2* uses the FFTW library when used with the (faster) fft detector.
39+
If you install *Clara2*, you need to install FFTW as well. You can finde
40+
compiled code at: http://www.fftw.org/ or the source code at
41+
https://github.com/FFTW/fftw3. FFTW3 is under GNU General Public License v2.0.

REFERENCE.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,11 @@ Available online 13 November 2013, ISSN 0168-9002,
1010
http://dx.doi.org/10.1016/j.nima.2013.10.073.
1111

1212

13+
14+
R. Pausch
15+
16+
Electromagnetic Radiation from Relativistic Electrons as Characteristic Signature of their Dynamics
17+
18+
Diploma Thesis 2012, Technische Universität Dresden, Germany
19+
20+
DOI: 10.5281/zenodo.843510

src/clara2_hypnos.modules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@ module purge
33
module load gcc/4.6.2
44
module load infiniband/1.0.0
55
module load openmpi/1.6.0
6+
module load fftw/3.3.4
7+
module load editor/emacs
8+

src/include/detector_fft.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#include "../settings.hpp"
2525
#include "physics_units.hpp"
2626
#include "utilities.hpp"
27-
#include "fft_ned.hpp"
27+
#include "ned_fft.hpp"
2828

2929

3030
// Constructor and Destructor:

src/include/makefile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121

2222
CC = g++
2323
CFLAGS = -Wall -O3 -c
24-
24+
# compile against fftw library
25+
# http://www.fftw.org/ - an open source FFT library under GPL 2.0 license
26+
CFFT = -lfftw3 -lm
2527

2628
all: libDetector.a fileExists.o
2729

@@ -39,8 +41,8 @@ detector_dft.o: detector_dft.cpp detector_dft.hpp vector.hpp utilities.hpp
3941
$(CC) $(CFLAGS) detector_dft.cpp
4042

4143
# Fast Fourier Transformation detectors:
42-
detector_fft.o: detector_fft.cpp detector_fft.hpp vector.hpp utilities.hpp fft_ned.hpp ../settings.hpp
43-
$(CC) $(CFLAGS) detector_fft.cpp
44+
detector_fft.o: detector_fft.cpp detector_fft.hpp vector.hpp utilities.hpp ned_fft.hpp ../settings.hpp
45+
$(CC) $(CFLAGS) $(CFFT) detector_fft.cpp
4446

4547
fileExists.o: fileExists.hpp fileExists.cpp
4648
$(CC) $(CFLAGS) fileExists.cpp

src/include/ned_fft.hpp

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
/**
2+
* Copyright 2014-2017 Richard Pausch
3+
*
4+
* This file is part of Clara 2.
5+
*
6+
* Clara 2 is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* Clara 2 is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU General Public License
17+
* along with Clara 2.
18+
* If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
21+
#include <fftw3.h>
22+
#pragma once
23+
24+
25+
26+
inline unsigned power_of_two(unsigned N)
27+
{
28+
unsigned exponent=1;
29+
for(; N > (1u<<exponent); ++exponent) {}
30+
return (1u<<exponent);
31+
}
32+
33+
34+
// non equal distant FFT
35+
template< typename A, typename T > // A...time, T...data
36+
class ned_FFT
37+
{
38+
39+
public:
40+
// constructor
41+
ned_FFT(unsigned N,
42+
A x_original[],
43+
T y_original[])
44+
: N_data(N),
45+
x_equi(0),
46+
y_equi(0),
47+
data_complex(0),
48+
spektrum(0),
49+
omega(0)
50+
{
51+
x_equi = new A[N];
52+
y_equi = new T[N];
53+
54+
delta_t = interpolation_equi(x_original, y_original, N_data,
55+
x_equi, y_equi, N_data);
56+
57+
unsigned exponent=1;
58+
for(; N_data > (1u<<exponent); ++exponent) {}
59+
N_bin = 1u<<exponent;
60+
61+
data_complex = new T[N_bin<<1];
62+
for(unsigned i=0; i<N_data; ++i)
63+
{
64+
data_complex[2*i] = y_equi[i];
65+
data_complex[2*i+1] = T(0.);
66+
}
67+
68+
for(unsigned i=2*N; i<(N_bin<<1); ++i)
69+
data_complex[i] = T(0.);
70+
71+
fft(data_complex, N_data);
72+
73+
omega_calc();
74+
spektrum_calc();
75+
}
76+
77+
// destructor
78+
~ned_FFT()
79+
{
80+
delete[] x_equi;
81+
delete[] y_equi;
82+
delete[] data_complex;
83+
84+
delete[] spektrum;
85+
delete[] omega;
86+
}
87+
88+
private:
89+
90+
void fft(T* data,
91+
unsigned long N)
92+
{
93+
// this uses the awesome fftw3 library
94+
// see http://www.fftw.org/
95+
96+
// transfer data to fftw3 own data structure
97+
// TODO we could avoid using this data transfer
98+
// by using fftw data types right away
99+
// for now not, to keep fft independent to allow later
100+
// use of liFFT
101+
// https://github.com/ComputationalRadiationPhysics/liFFT
102+
fftw_complex *in, *out;
103+
input = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
104+
output = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
105+
106+
// default fft complex to complex
107+
// TODO since input is real, there is a faster real implementation
108+
// without Nyquist reflections
109+
fftw_plan plan = fftw_plan_dft_1d(N,
110+
input,
111+
output,
112+
FFTW_FORWARD,
113+
FFTW_ESTIMATE);
114+
115+
// here we have to assume that the data is a vector type of 3 dimensions
116+
// TODO this is not necessarily the case
117+
// and this breaks the template structure of the rest of the code
118+
for(unsigned int index_vec = 0; index_vec < 3; index_vec++)
119+
{
120+
for (unsigned int i=0; i<N; i++)
121+
{
122+
input[i][0] = data[2*i][index_vec]; // real input (signal)
123+
input[i][1] = data[2*i+1][index_vec]; // imaginary input (signal)
124+
}
125+
126+
fftw_execute(plan); // run FFT
127+
128+
// copy data back into original data array
129+
for(unsigned int i=0; i<N; i++)
130+
{
131+
data[2*i][index_vec] = output[i][0]; // real output (spectrum)
132+
data[2*i+1][index_vec] = output[i][1]; // imaginary output (spectrum)
133+
}
134+
}
135+
// free memory for fftw in-between data
136+
fftw_destroy_plan(plan); // could be freed before data transfer
137+
fftw_free(input); fftw_free(output);
138+
}
139+
140+
A interpolation_equi(A x_0[],
141+
T y_0[],
142+
unsigned N_0,
143+
A x_1[],
144+
T y_1[],
145+
unsigned N_1)
146+
{
147+
for (unsigned i=1; i < N_0; ++i)
148+
{
149+
if(x_0[i-1] > x_0[i])
150+
{
151+
std::cerr << "error 01: interpolation inverted (ned_fft.hpp) "
152+
<< i << " --> " << x_0[i-1]
153+
<< " <=! " << x_0[i] << "\n";
154+
}
155+
}
156+
157+
const A min = x_0[0];
158+
const A max = x_0[N_0-1];
159+
160+
// creating equidistant x_values
161+
for (unsigned i=0; i < N_1; ++i)
162+
x_1[i] = min + (max-min)/(N_1) * i;
163+
164+
// calculating y_values
165+
unsigned j=0;
166+
for (unsigned i=0; i<N_1; ++i)
167+
{
168+
for(; !(x_0[j] <= x_1[i] && x_1[i] < x_0[j+1]); ++j) {}
169+
170+
if (!(x_0[j] <= x_1[i] && x_1[i] < x_0[j+1]))
171+
std::cerr << "error 02: (ned_fft.hpp)" << std::endl;
172+
173+
y_1[i] = y_0[j] + (y_0[j+1] - y_0[j])*((x_1[i]-x_0[j])/(x_0[j+1]-x_0[j]));
174+
}
175+
return (max-min)/N_1;
176+
}
177+
178+
179+
// calculate angular frequency
180+
void omega_calc()
181+
{
182+
omega = new A[N_bin];
183+
for (unsigned i=0; i<N_bin; ++i)
184+
omega[i] = (2*M_PI*i)/(N_bin*delta_t);
185+
}
186+
187+
// calculate spectrum (T-->A)
188+
void spektrum_calc()
189+
{
190+
spektrum = new A[N_bin];
191+
for (unsigned i=0; i<N_bin; ++i)
192+
spektrum[i] = std::sqrt(data_complex[2*i]*data_complex[2*i] +
193+
data_complex[2*i+1]*data_complex[2*i+1]);
194+
}
195+
196+
197+
public:
198+
unsigned N_data;
199+
unsigned N_bin;
200+
A delta_t;
201+
A* x_equi;
202+
T* y_equi;
203+
T* data_complex;
204+
A* spektrum;
205+
A* omega;
206+
207+
};
208+
209+
210+
211+
// usage:
212+
// for (unsigned i = 0; i< N_data; ++i)
213+
// {
214+
// x_data[i] = random_double(i, 0.0)*0.2 - 5.003;
215+
// y_data[i] = fkt(x_data[i]);
216+
// }
217+
//
218+
// ned_FFT<double> spektrum(N_data, x_data, y_data);

src/makefile

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222

2323
CC = g++
2424
CFLAGS = -Wall -O3
25+
# compile against fftw library
26+
# http://www.fftw.org/ - an open source FFT library under GPL 2.0 license
27+
CFFT = -lfftw3 -lm
2528
COBJ = -c
2629
OMP = -fopenmp
2730
ZIP = -lz
@@ -45,28 +48,28 @@ subsystem:
4548

4649

4750
MPI: MPI_main all_directions.o ./include/libDetector.a single_direction.o
48-
$(MPICC) $(CFLAGS) $(OMP) $(ZIP) main.o all_directions.o single_direction.o ./include/libDetector.a ./include/fileExists.o -o executable
51+
$(MPICC) $(CFLAGS) $(CFFT) $(OMP) $(ZIP) main.o all_directions.o single_direction.o ./include/libDetector.a ./include/fileExists.o -o executable
4952

5053
ARRAY: ARRAY_main all_directions.o ./include/libDetector.a single_direction.o
51-
$(CC) $(CFLAGS) $(OMP) $(ZIP) main.o all_directions.o single_direction.o ./include/libDetector.a ./include/fileExists.o -o executable
54+
$(CC) $(CFLAGS) $(CFFT) $(OMP) $(ZIP) main.o all_directions.o single_direction.o ./include/libDetector.a ./include/fileExists.o -o executable
5255

5356

5457
MPI_main: main.cpp all_directions.hpp
55-
$(MPICC) $(CFLAGS) $(MPIFLAG) $(COBJ) $(OMP) $(ZIP) main.cpp
58+
$(MPICC) $(CFLAGS) $(CFFT) $(MPIFLAG) $(COBJ) $(OMP) $(ZIP) main.cpp
5659

5760
ARRAY_main: main.cpp all_directions.hpp
58-
$(CC) $(CFLAGS) $(ARRAYFLAG) $(COBJ) $(OMP) $(ZIP) main.cpp
61+
$(CC) $(CFLAGS) $(CFFT) $(ARRAYFLAG) $(COBJ) $(OMP) $(ZIP) main.cpp
5962

6063
# main routine:
6164
single_direction.o: single_direction.hpp single_direction.cpp ./include/detector_e_field.hpp ./include/detector_dft.hpp \
6265
./include/detector_fft.hpp ./include/vector.hpp ./include/import_from_file.hpp ./include/discrete.hpp \
6366
./run_through_data.hpp ./include/load_txt.hpp ./include/interpolation.hpp \
6467
./include/interpolation.tpp ./include/fileExists.hpp ./include/fileExists.cpp
65-
$(CC) $(CFLAGS) $(COBJ) $(CFLAGFORTRAN) $(OMP) -I./include/ single_direction.cpp
68+
$(CC) $(CFLAGS) $(CFFT) $(COBJ) $(CFLAGFORTRAN) $(OMP) -I./include/ single_direction.cpp
6669

6770
all_directions.o: all_directions.cpp all_directions.hpp single_direction.hpp ./include/vector.hpp settings.hpp \
6871
setFilename.hpp ./include/input_output.hpp
69-
$(CC) $(CFLAGS) $(COBJ) $(OMP) $(ZIP) -I./include/ all_directions.cpp
72+
$(CC) $(CFLAGS) $(CFFT) $(COBJ) $(OMP) $(ZIP) -I./include/ all_directions.cpp
7073

7174

7275
# How do I include the detector.hpp <-- vector.hpp dependency

0 commit comments

Comments
 (0)