diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index cf314e943b6a..aa76aa64cfd7 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -21,16 +21,16 @@ jobs: strategy: fail-fast: false matrix: - llvm: ["16", "17", "18"] - build: ["Release", "Debug"] # "RelWithDebInfo" - os: [openstack18] + llvm: ["15", "16", "17", "18"] + build: ["Release"] #, "Debug" "RelWithDebInfo" + os: [openstack22] timeout-minutes: 120 steps: - name: add llvm run: | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - sudo apt-add-repository "deb http://apt.llvm.org/`lsb_release -c | cut -f2`/ llvm-toolchain-`lsb_release -c | cut -f2`-${{ matrix.llvm }} main" || true - sudo apt-get install -y python3-pip autoconf cmake gcc g++ libtool gfortran libblas-dev llvm-${{ matrix.llvm }}-dev clang-${{ matrix.llvm }} libeigen3-dev libboost-dev + sudo apt-get install -y python3-pip autoconf cmake gcc g++ libtool gfortran libblas-dev llvm-${{ matrix.llvm }}-dev clang-${{ matrix.llvm }} libeigen3-dev libboost-dev libzstd-dev sudo python3 -m pip install lit pathlib sudo touch /usr/lib/llvm-${{ matrix.llvm }}/bin/yaml-bench - uses: actions/checkout@v4 diff --git a/enzyme/benchmarks/ReverseMode/adbench/gmm.h b/enzyme/benchmarks/ReverseMode/adbench/gmm.h index 00f4302b9f99..24da48fc791b 100644 --- a/enzyme/benchmarks/ReverseMode/adbench/gmm.h +++ b/enzyme/benchmarks/ReverseMode/adbench/gmm.h @@ -168,8 +168,8 @@ int main(const int argc, const char* argv[]) { std::vector paths;// = { "1k/gmm_d10_K100.txt" }; getTests(paths, "data/1k", "1k/"); - getTests(paths, "data/2.5k", "2.5k/"); - getTests(paths, "data/10k", "10k/"); + //getTests(paths, "data/2.5k", "2.5k/"); + //getTests(paths, "data/10k", "10k/"); std::ofstream jsonfile("results.json", std::ofstream::trunc); json test_results; diff --git a/enzyme/benchmarks/ReverseMode/ba/Makefile.make b/enzyme/benchmarks/ReverseMode/ba/Makefile.make index 6f0f2cc18242..b7f013dc4b57 100644 --- a/enzyme/benchmarks/ReverseMode/ba/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/ba/Makefile.make @@ -1,23 +1,23 @@ -# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" make -B ba-unopt.ll ba-raw.ll results.json -f %s +# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" PTR="%ptr" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" LOADCLANG="%loadClangEnzyme" ENZYME="%enzyme" make -B ba-raw.ll results.json -f %s .PHONY: clean +dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) + clean: rm -f *.ll *.o results.txt results.json %-unopt.ll: %.cpp - clang++ $(BENCH) $^ -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -Xclang -new-struct-path-tbaa -o $@ -S -emit-llvm - #clang++ $(BENCH) $^ -O1 -Xclang -disable-llvm-passes -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -Xclang -new-struct-path-tbaa -o $@ -S -emit-llvm + clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm %-raw.ll: %-unopt.ll - opt $^ $(LOAD) -enzyme -o $@ -S + opt $^ $(LOAD) $(ENZYME) -o $@ -S %-opt.ll: %-raw.ll opt $^ -o $@ -S - #opt $^ -O2 -o $@ -S ba.o: ba-opt.ll - clang++ -O2 $^ -o $@ $(BENCHLINK) + clang++ $(BENCH) -pthread -O2 $^ -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11 results.json: ba.o ./$^ diff --git a/enzyme/benchmarks/ReverseMode/fft/Makefile.make b/enzyme/benchmarks/ReverseMode/fft/Makefile.make index ffeddd5507df..17ea03aaa5ae 100644 --- a/enzyme/benchmarks/ReverseMode/fft/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/fft/Makefile.make @@ -1,23 +1,24 @@ -# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" make -B fft-unopt.ll fft-raw.ll fft-opt.ll results.txt VERBOSE=1 -f %s +# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" PTR="%ptr" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" LOADCLANG="%loadClangEnzyme" ENZYME="%enzyme" make -B fft-raw.ll results.json -f %s .PHONY: clean +dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) + clean: - rm -f *.ll *.o results.txt + rm -f *.ll *.o results.txt results.json %-unopt.ll: %.cpp - clang++ $(BENCH) $^ -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm - #clang++ $(BENCH) $^ -O1 -Xclang -disable-llvm-passes -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm + clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm %-raw.ll: %-unopt.ll - opt $^ $(LOAD) -enzyme -o $@ -S + opt $^ $(LOAD) $(ENZYME) -o $@ -S %-opt.ll: %-raw.ll opt $^ -o $@ -S - #opt $^ -O2 -o $@ -S fft.o: fft-opt.ll - clang++ -O2 $^ -o $@ $(BENCHLINK) -lm + clang++ $(BENCH) -pthread -O2 $^ -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11 + #clang++ $(LOAD) $(BENCH) fft.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o fft.o -lpthread $(BENCHLINK) -lm -L /usr/lib/gcc/x86_64-linux-gnu/11 -results.txt: fft.o +results.json: fft.o ./$^ 1048576 | tee $@ diff --git a/enzyme/benchmarks/ReverseMode/gmm/Makefile.make b/enzyme/benchmarks/ReverseMode/gmm/Makefile.make index 5072679eeb0e..1e8e711da1ba 100644 --- a/enzyme/benchmarks/ReverseMode/gmm/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/gmm/Makefile.make @@ -1,23 +1,24 @@ -# RUN: if [ %llvmver -ge 12 ] || [ %llvmver -le 9 ]; then cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" make -B gmm-unopt.ll gmm-raw.ll results.json -f %s; fi +# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" PTR="%ptr" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" LOADCLANG="%loadClangEnzyme" ENZYME="%enzyme" make -B gmm-raw.ll results.json -f %s .PHONY: clean +dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) + clean: rm -f *.ll *.o results.txt results.json %-unopt.ll: %.cpp - clang++ $(BENCH) $^ -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm - #clang++ $(BENCH) $^ -O1 -Xclang -disable-llvm-passes -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm + clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm %-raw.ll: %-unopt.ll - opt $^ $(LOAD) -enzyme -o $@ -S + opt $^ $(LOAD) $(ENZYME) -o $@ -S %-opt.ll: %-raw.ll opt $^ -o $@ -S - #opt $^ -O2 -o $@ -S gmm.o: gmm-opt.ll - clang++ -O2 $^ -o $@ $(BENCHLINK) -lm + clang++ -pthread -O2 $^ -o $@ $(BENCHLINK) -lm + #clang++ $(LOADCLANG) $(BENCH) gmm.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o gmm.o -lpthread $(BENCHLINK) -lm -L /usr/lib/gcc/x86_64-linux-gnu/11 results.json: gmm.o ./$^ diff --git a/enzyme/benchmarks/ReverseMode/hand/Makefile.make b/enzyme/benchmarks/ReverseMode/hand/Makefile.make index 09975ebea232..04bb3760d28b 100644 --- a/enzyme/benchmarks/ReverseMode/hand/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/hand/Makefile.make @@ -1,4 +1,4 @@ -# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" make -B hand-raw.ll results.json -f %s +# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" ENZYME="%enzyme" make -B hand-raw.ll results.json -f %s .PHONY: clean diff --git a/enzyme/benchmarks/ReverseMode/lstm/Makefile.make b/enzyme/benchmarks/ReverseMode/lstm/Makefile.make index 4323ac694a08..276c5df7b450 100644 --- a/enzyme/benchmarks/ReverseMode/lstm/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/lstm/Makefile.make @@ -1,23 +1,23 @@ -# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" make -B lstm-raw.ll results.json -f %s +# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" PTR="%ptr" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" LOADCLANG="%loadClangEnzyme" ENZYME="%enzyme" make -B lstm-raw.ll results.json -f %s .PHONY: clean +dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) + clean: - rm -f *.ll *.o results.txt + rm -f *.ll *.o results.txt results.json %-unopt.ll: %.cpp - clang++ $(BENCH) $^ -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm - #clang++ $(BENCH) $^ -O1 -Xclang -disable-llvm-passes -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm + clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm %-raw.ll: %-unopt.ll - opt $^ $(LOAD) -enzyme -o $@ -S + opt $^ $(LOAD) $(ENZYME) -o $@ -S %-opt.ll: %-raw.ll opt $^ -o $@ -S - #opt $^ -O2 -o $@ -S lstm.o: lstm-opt.ll - clang++ -O2 $^ -o $@ $(BENCHLINK) -lm + clang++ -pthread -O2 $^ -o $@ $(BENCHLINK) -lm results.json: lstm.o ./$^ diff --git a/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make b/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make index 3dd680e5a1c4..5abb283600e4 100644 --- a/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make @@ -1,25 +1,25 @@ -# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" make -B ode-raw.ll ode-opt.ll results.txt VERBOSE=1 -f %s +# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" PTR="%ptr" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" ENZYME="%enzyme" make -B ode-raw.ll ode-opt.ll results.json VERBOSE=1 -f %s .PHONY: clean +dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) + clean: - rm -f *.ll *.o results.txt + rm -f *.ll *.o results.txt results.json %-unopt.ll: %.cpp - clang++ $(BENCH) $^ -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm - #clang++ $(BENCH) $^ -O1 -Xclang -disable-llvm-passes -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm + clang++ $(BENCH) $(PTR) $^ -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm %-raw.ll: %-unopt.ll - opt $^ $(LOAD) -enzyme -o $@ -S + opt $^ $(LOAD) $(ENZYME) -o $@ -S %-opt.ll: %-raw.ll opt $^ -o $@ -S - #opt $^ -O2 -o $@ -S ode.o: ode-opt.ll - clang++ -O2 $^ -o $@ $(BENCHLINK) + clang++ $(BENCH) -O2 $^ -o $@ $(BENCHLINK) -results.txt: ode.o +results.json: ode.o ./$^ 1000 | tee $@ ./$^ 1000 >> $@ ./$^ 1000 >> $@ diff --git a/enzyme/benchmarks/ReverseMode/taylorlog/Makefile.make b/enzyme/benchmarks/ReverseMode/taylorlog/Makefile.make index 1ae170bd94e2..ba39402e4269 100644 --- a/enzyme/benchmarks/ReverseMode/taylorlog/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/taylorlog/Makefile.make @@ -9,7 +9,8 @@ clean: clang++ $(BENCH) $^ -ffast-math -O2 -fno-unroll-loops -fno-vectorize -o $@ -S -emit-llvm %-raw.ll: %-unopt.ll - opt $^ $(LOAD) -enzyme -mem2reg -early-cse -correlated-propagation -aggressive-instcombine -adce -loop-deletion -o $@ -S + opt $^ $(LOAD) -enzyme -mem2reg -early-cse -correlated-propagation -adce -loop-deletion -o $@ -S + #opt $^ $(LOAD) -enzyme -mem2reg -early-cse -correlated-propagation -aggressive-instcombine -adce -loop-deletion -o $@ -S %-opt.ll: %-raw.ll opt $^ -O2 -o $@ -S diff --git a/enzyme/benchmarks/lit.site.cfg.py.in b/enzyme/benchmarks/lit.site.cfg.py.in index 93937f9c62d3..adfac5c63608 100644 --- a/enzyme/benchmarks/lit.site.cfg.py.in +++ b/enzyme/benchmarks/lit.site.cfg.py.in @@ -44,26 +44,60 @@ config.substitutions.append(('%blink', config.bench_link)) config.substitutions.append(('%bldpath', config.bench_ldpath)) config.substitutions.append(('%shlibext', config.llvm_shlib_ext)) -config.substitutions.append(('%lli', config.llvm_tools_dir + "/lli" + (" --jit-kind=mcjit" if int(config.llvm_ver) >= 13 else "") +config.substitutions.append(('%lli', config.llvm_tools_dir + "/lli" )) config.substitutions.append(('%opt', config.llvm_tools_dir + "/opt")) config.substitutions.append(('%llvmver', config.llvm_ver)) config.substitutions.append(('%FileCheck', config.llvm_tools_dir + "/FileCheck")) -config.substitutions.append(('%clang', config.llvm_tools_dir + "/clang")) -config.substitutions.append(('%loadEnzyme', '' - + (" --enable-new-pm=0" if int(config.llvm_ver) >= 13 else "") - + ' -load=@ENZYME_BINARY_DIR@/Enzyme/LLVMEnzyme-' + config.llvm_ver + config.llvm_shlib_ext - + (" --enzyme-attributor=0" if int(config.llvm_ver) >= 13 else "") - + ' -enzyme-preopt=0' - )) + +emopt = config.enzyme_obj_root + "/Enzyme/MLIR/enzymemlir-opt" +if len("@ENZYME_BINARY_DIR@") == 0: + emopt = os.path.dirname(os.path.abspath(__file__)) + "/../enzymemlir-opt" + +eclang = config.llvm_tools_dir + "/clang" +if len("@ENZYME_BINARY_DIR@") == 0: + eclang = os.path.dirname(os.path.abspath(__file__)) + "/../enzyme-clang" + resource = config.llvm_tools_dir + "/../clang/staging" + eclang += " -resource-dir " + resource + " " + eclang += "-I " + os.path.dirname(os.path.abspath(__file__)) + "/Integration" + +config.substitutions.append(('%eopt', emopt)) +config.substitutions.append(('%llvmver', config.llvm_ver)) +config.substitutions.append(('%FileCheck', config.llvm_tools_dir + "/FileCheck")) +config.substitutions.append(('%clang', eclang)) +config.substitutions.append(('%O0TBAA', "-O1 -Xclang -disable-llvm-passes")) + +newPM = ((" --enable-new-pm=1" if int(config.llvm_ver) == 15 else "") + + ' -load-pass-plugin=@ENZYME_BINARY_DIR@/Enzyme/LLVMEnzyme-' + config.llvm_ver + config.llvm_shlib_ext + + ' -passes=@ENZYME_BINARY_DIR@/Enzyme/LLVMEnzyme-' + config.llvm_ver + config.llvm_shlib_ext + + (" --enzyme-attributor=0" if int(config.llvm_ver) >= 16 else "")) +if len("@ENZYME_BINARY_DIR@") == 0: + newPM = ((" --enable-new-pm=1" if int(config.llvm_ver) == 15 else "") + + (" --enzyme-attributor=0" if int(config.llvm_ver) >= 16 else "")) + +newPMOP = newPM +if int(config.llvm_ver) == 16: + newPM += " -opaque-pointers=0" + +config.substitutions.append(('%loadEnzyme', newPM)) +config.substitutions.append(('%newLoadEnzyme', newPM)) +config.substitutions.append(('%OPnewLoadEnzyme', newPMOP)) +config.substitutions.append(('%enzyme', '-passes="enzyme"')) +config.substitutions.append(('%ptr', ('-Xclang -opaque-pointers=0' if int(config.llvm_ver) == 16 else ''))) +#config.substitutions.append(('%enzyme', ('-enzyme' if int(config.llvm_ver) < 16 else '-passes="enzyme"'))) +config.substitutions.append(('%simplifycfg', "simplifycfg")) +config.substitutions.append(('%loopmssa', "loop-mssa")) + config.substitutions.append(('%loadBC', '' + ' @ENZYME_BINARY_DIR@/BCLoad/BCPass-' + config.llvm_ver + config.llvm_shlib_ext )) config.substitutions.append(('%BClibdir', '@ENZYME_SOURCE_DIR@/bclib/')) -config.substitutions.append(('%loadClangEnzyme', '' - + (" -fno-experimental-new-pass-manager" if int(config.llvm_ver) >= 13 else "") - + ' -Xclang -load -Xclang @ENZYME_BINARY_DIR@/Enzyme/ClangEnzyme-' + config.llvm_ver + config.llvm_shlib_ext - )) + +newPM = (' -fpass-plugin=@ENZYME_BINARY_DIR@/Enzyme/ClangEnzyme-' + config.llvm_ver + config.llvm_shlib_ext + + ' -Xclang -load -Xclang @ENZYME_BINARY_DIR@/Enzyme/ClangEnzyme-' + config.llvm_ver + config.llvm_shlib_ext) + +config.substitutions.append(('%newLoadClangEnzyme', newPM)) +config.substitutions.append(('%LoadClangEnzyme', newPM)) # Let the main config do the real work. lit_config.load_config(config, "@ENZYME_SOURCE_DIR@/benchmarks/lit.cfg.py")