|
73 | 73 | '-Wunused', '-Wunused-function', '-Wunused-label', |
74 | 74 | '-Wunused-parameter', '-Wunused-value', '-Wunused-variable', |
75 | 75 | '-Wvariadic-macros', '-Wvolatile-register-var', '-Wwrite-strings', |
76 | | - '-pipe', '-Ofast', '-s', '-std=c++17', '-fopenmp', |
| 76 | + '-pipe', '-Ofast', '-s', '-std=c++17', '-fopenmp', '-mmmx', |
77 | 77 | '-msse', '-msse2', '-msse3', '-msse4', '-msse4.1', '-msse4.2', |
78 | | - '-mavx', '-mavx2', '-mfpmath=sse', '-march=native', |
79 | | - '-funroll-loops', '-ffast-math' |
| 78 | + '-mavx', '-mavx2', '-mfma', '-mfpmath=sse', |
| 79 | + '-march=native', '-funroll-loops', '-ffast-math' |
80 | 80 | ] |
81 | 81 |
|
82 | 82 | if PLATFORM != 'Windows': |
|
91 | 91 | exe_build_args.remove('-Wvolatile-register-var') |
92 | 92 | exe_build_args.remove('-Weffc++') |
93 | 93 | exe_build_args.remove('-Ofast') |
| 94 | + exe_build_args.remove('-mmmx') |
94 | 95 | exe_build_args.remove('-msse') |
95 | 96 | exe_build_args.remove('-msse2') |
96 | 97 | exe_build_args.remove('-msse3') |
|
99 | 100 | exe_build_args.remove('-msse4.2') |
100 | 101 | exe_build_args.remove('-mavx') |
101 | 102 | exe_build_args.remove('-mavx2') |
| 103 | + exe_build_args.remove('-mfma') |
102 | 104 | exe_build_args.remove('-mfpmath=sse') |
103 | 105 | exe_build_args.remove('-s') |
104 | 106 |
|
|
133 | 135 | cuda_build_args.append('/std:c++17') |
134 | 136 |
|
135 | 137 | lib_build_args = lib_build_args + [ |
136 | | - '-pipe', '-Ofast', '-s', |
137 | | - '-std=c++17', '-fopenmp', |
138 | | - '-msse', '-msse2', '-msse3', |
| 138 | + '-pipe', '-Ofast', '-s', '-std=c++17', |
| 139 | + '-fopenmp', '-msse', '-msse2', '-msse3', |
139 | 140 | '-msse4', '-msse4.1', '-msse4.2', |
140 | | - '-mavx', '-mavx2', '-mfpmath=sse', |
141 | | - '-march=native' |
| 141 | + '-mavx', '-mavx2', '-mmmx', '-mfma', |
| 142 | + '-mfpmath=sse', '-march=native' |
142 | 143 | ] |
143 | 144 |
|
144 | 145 | cuda_build_args.append('-Iinclude') |
|
0 commit comments