Skip to content

Commit 879dc35

Browse files
authored
Update yaml (#462)
* upadte yaml with ubench args * args for atomics * fix some args * reduce mem number of blocks * update workflow * update workflow * update hw ssats * fix path , and dropping Turing
1 parent 3ff1be8 commit 879dc35

File tree

2 files changed

+42
-34
lines changed

2 files changed

+42
-34
lines changed

.github/workflows/long-tests.yml

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# This is a basic workflow to help you get started with Actions
22

3+
#Turing commands are commented out , we regress QV100,RTX3070,A100
4+
35
name: Long Tests
46

57
# Controls when the workflow will run
@@ -40,10 +42,17 @@ jobs:
4042
run: |
4143
source ./env-setup/12.4_env_setup.sh
4244
source ./gpu-simulator/setup_environment.sh
43-
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-SASS -T ~/../common/accel-sim/traces/volta-tesla-v100/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
44-
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C RTX2060-SASS -T ~/../common/accel-sim/traces/turing-rtx2060/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
45-
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C RTX3070-SASS -T ~/../common/accel-sim/traces/ampere-rtx3070/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
46-
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
45+
./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100//hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
46+
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
47+
48+
#./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C RTX2060-SASS -T ~/../common/accel-sim/traces/turing-rtx2060/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
49+
50+
./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
51+
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
52+
53+
./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
54+
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
55+
4756
./util/job_launching/run_simulations.py -B mlperf_inference -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/mlperf_rtx3070/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
4857
4958
./util/job_launching/monitor_func_test.py -v -s --sleep_time 300 stats-per-app-sass.csv -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
@@ -56,25 +65,25 @@ jobs:
5665
# either create a new branch or check it out if it already exists
5766
git -C ./statistics-archive checkout $BRANCH_NAME 2>/dev/null || git -C ./statistics-archive checkout -b $BRANCH_NAME
5867
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C QV100-SASS -A | tee v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
59-
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX2060-SASS -A | tee turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
68+
#./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX2060-SASS -A | tee turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
6069
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX3070-SASS -A | tee ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
6170
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C A100-SASS -A | tee ampere-a100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
6271
6372
mkdir -p statistics-archive/ubench/
6473
# First we merge and archive this run to the main csv that contains all previous runs
6574
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/v100-ubench-sass.csv,v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
6675
| tee v100-ubench-sass.csv && mv v100-ubench-sass.csv ./statistics-archive/ubench/
67-
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
68-
| tee turing-ubench-sass.csv && mv turing-ubench-sass.csv ./statistics-archive/ubench/
76+
# ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
77+
# | tee turing-ubench-sass.csv && mv turing-ubench-sass.csv ./statistics-archive/ubench/
6978
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-ubench-sass.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
7079
| tee ampere-ubench-sass.csv && mv ampere-ubench-sass.csv ./statistics-archive/ubench/
7180
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
7281
| tee ampere-a100-ubench-sass.csv && mv ampere-a100-ubench-sass.csv ./statistics-archive/ubench/
7382
# Next we merge the latest run with the current run (used for correlation plots) then archive the current run as the new latest for the next time this action occurs
7483
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/v100-ubench-sass-latest.csv,v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
7584
| tee v100-ubench-sass-latest2.csv && mv v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/v100-ubench-sass-latest.csv
76-
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass-latest.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
77-
| tee turing-ubench-sass-latest2.csv && mv turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/turing-ubench-sass-latest.csv
85+
# ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass-latest.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
86+
# | tee turing-ubench-sass-latest2.csv && mv turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/turing-ubench-sass-latest.csv
7887
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-ubench-sass-latest.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
7988
| tee ampere-ubench-sass-latest2.csv && mv ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/ampere-ubench-sass-latest.csv
8089
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass-latest.csv,ampere-a100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
@@ -90,10 +99,10 @@ jobs:
9099
source ./env-setup/12.4_env_setup.sh
91100
./util/hw_stats/get_hw_data.sh > /dev/null 2>&1
92101
rm -rf ./util/plotting/correl-html/
93-
./util/plotting/plot-correlation.py -c ./v100-ubench-sass-latest2.csv -H ./hw_run/volta-tesla-v100/11.2/ | tee v100-ubench-correl.txt
94-
./util/plotting/plot-correlation.py -c ./turing-ubench-sass-latest2.csv -H ./hw_run/TURING-RTX2060/10.2/ | tee turing-ubench-correl.txt
95-
./util/plotting/plot-correlation.py -c ./ampere-ubench-sass-latest2.csv -H ./hw_run/AMPERE-RTX3070/11.2/ | tee ampere-ubench-correl.txt
96-
./util/plotting/plot-correlation.py -c ./ampere-a100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/device-0/12.8/ | tee ampere-a100-ubench-correl.txt
102+
./util/plotting/plot-correlation.py -c ./v100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100/hw_run/device-0/12.8/ | tee v100-ubench-correl.txt
103+
#./util/plotting/plot-correlation.py -c ./turing-ubench-sass-latest2.csv -H ./hw_run/TURING-RTX2060/10.2/ | tee turing-ubench-correl.txt
104+
./util/plotting/plot-correlation.py -c ./ampere-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/device-0/12.8/ | tee ampere-ubench-correl.txt
105+
./util/plotting/plot-correlation.py -c ./ampere-a100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/device-0/12.8/ | tee ampere-a100-ubench-correl.txt
97106
ssh ghci@tgrogers-pc01 mkdir -p /home/ghci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/
98107
rsync --delete -r ./util/plotting/correl-html/ ghci@tgrogers-pc01:/home/ghci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/
99108
if [[ $GITHUB_EVENT_NAME == 'push' ]]; then
@@ -142,7 +151,9 @@ jobs:
142151
srun --time=8:00:00 -c20 make rodinia_2.0-ft GPU_Microbenchmark -j20 -C ./gpu-app-collection/src
143152
./gpu-app-collection/get_regression_data.sh
144153
145-
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
154+
#./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
155+
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
156+
146157
./util/job_launching/monitor_func_test.py -v -s stats-per-app-ptx.csv -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
147158
Tracer-Tool:
148159
if: github.repository == 'accel-sim/accel-sim-framework'

util/job_launching/apps/define-all-apps.yml

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -54,43 +54,40 @@ GPU_Microbenchmark:
5454
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
5555
execs:
5656
- l1_bw_32f:
57-
- args:
57+
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
5858
accel-sim-mem: 1G
5959
- l1_bw_64f:
60-
- args:
60+
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
6161
accel-sim-mem: 1G
6262
- l1_bw_128:
63-
- args:
63+
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
6464
accel-sim-mem: 2G
6565
- l1_lat:
66-
- args:
67-
accel-sim-mem: 1G
68-
- l1_lat:
69-
- args:
66+
- args: --blocks 1 --ws 32
7067
accel-sim-mem: 1G
7168
- l2_bw_32f:
72-
- args:
69+
- args: --tpb 1024 --tpsm 1024 --blocks 160 --ws 32
7370
accel-sim-mem: 6G
7471
- l2_bw_64f:
75-
- args:
72+
- args: --tpb 1024 --tpsm 1024 --blocks 160 --l2 786432 --ws 32
7673
accel-sim-mem: 6G
7774
# - l2_bw_128:
7875
# - args:
7976
# accel-sim-mem: 1G
8077
- l2_lat:
81-
- args:
78+
- args: --tpb 1 --tpsm 1 --blocks 1 --l2 786432 --ws 32
8279
accel-sim-mem: 1G
8380
- mem_bw:
84-
- args:
81+
- args: --tpb 1024 --tpsm 1024 --blocks 80 --l2 1572864 --ws 32 --memclk 1132 --membw 64
8582
accel-sim-mem: 2G
8683
- mem_lat:
87-
- args:
84+
- args: --tpb 1024 --tpsm 1024 --blocks 80 --l2 1572864 --ws 32 --memclk 1132 --membw 64
8885
accel-sim-mem: 1G
8986
- shared_bw:
90-
- args:
87+
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
9188
accel-sim-mem: 2G
9289
- shared_lat:
93-
- args:
90+
- args: --blocks 1 --ws 32
9491
accel-sim-mem: 1G
9592
- shared_bank_conflicts:
9693
## argument 1 kernel has conflicts
@@ -100,13 +97,13 @@ GPU_Microbenchmark:
10097
- args: 2
10198
accel-sim-mem: 1G
10299
- MaxFlops:
103-
- args:
100+
- args: --tpb 1024 --blocks 1 --ws 32
104101
accel-sim-mem: 1G
105102
- l1_shared_bw:
106-
- args:
103+
- args: --tpb 1024 --blocks 1 --ws 32
107104
accel-sim-mem: 1G
108105
- l1_bw_32f_unroll:
109-
- args:
106+
- args: --tpb 1024 --blocks 1 --ws 32
110107
accel-sim-mem: 1G
111108
- l1_bw_32f_unroll_large:
112109
- args:
@@ -117,10 +114,10 @@ GPU_Atomic:
117114
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
118115
execs:
119116
- atomic_add_bw:
120-
- args:
117+
- args: --tpb 1 --tpsm 1 --blocks 1 --ws 32
121118
accel-sim-mem: 1G
122119
- atomic_add_bw_conflict:
123-
- args:
120+
- args: --tpb 1024 --tpsm 2048 --blocks 160 --ws 32
124121
accel-sim-mem: 1G
125122
- atomic_add_bw_profile:
126123
- args: 16
@@ -1060,4 +1057,4 @@ huggingface:
10601057
execs:
10611058
- helloworld:
10621059
- args:
1063-
accel-sim-mem: 10G
1060+
accel-sim-mem: 10G

0 commit comments

Comments
 (0)