A simple, benchmarking scenario.
Below code tests the speed of a neutral, constant population simulation on a GPU:
29 cudaEvent_t start, stop;
32 cudaEventCreate(&start);
33 cudaEventCreate(&stop);
35 for(
int i = 0; i < num_iter; i++){
36 if(i == num_iter/2){ cudaEventRecord(start, 0); }
41 cudaEventRecord(stop, 0);
42 cudaEventSynchronize(stop);
43 cudaEventElapsedTime(&elapsedTime, start, stop);
44 cudaEventDestroy(start);
45 cudaEventDestroy(stop);
56 int main(
int argc,
char **argv){ run_speed_test(); }
functor: models selection coefficient s as a constant across populations and over time ...
int seed2
random number seed 2 of 2
functor: turns sampling and preserving off (for every generation except the final one which is always...
control and output data structure for GO_Fish simulation
int compact_interval
how often to compact the simulation and remove fixed or lost mutations
functor: single, constant population size (N individuals) across populations and over time ...
int num_populations
number of populations in simulation
int maximal_num_mutations()
returns number of reported mutations in the final time sample (maximal number of stored mutations in ...
int seed1
random number seed 1 of 2
functor: models parameter p as a constant across populations and over time
functor: migration flows at rate m from pop i to pop j =/= i and 1-(num_pop-1)*m for i == j ...
GO Fish Simulation API (contains namespaces GO_Fish and Sim_Model)
__host__ void run_sim(allele_trajectories &all_results, const Functor_mutation mu_rate, const Functor_demography demography, const Functor_migration mig_prop, const Functor_selection sel_coeff, const Functor_inbreeding FI, const Functor_dominance dominance, const Functor_preserve preserve_mutations, const Functor_timesample take_sample)
runs a single-locus Wright-Fisher simulation specified by the given simulation functions and sim_cons...
float num_sites
number of sites in simulation
sim_constants sim_input_constants
constants for initializing the next simulation
int num_generations
number of generations in simulation
In the example makefile below, each line is documented by the top part of the makefile:
Tip: The makefile below compiles machine code explicitly for generation 3.0 and 5.2 GPUs and uses just in time (JIT) compilation for everything else (lowest GPU generation which works for 3P is 3.0). Compilation (and program execution) will be faster if compiling for your specific GPU.
e.g. if running a Tesla K20 or Tesla K40, then the corresponding GPU generation is 3.5: all the current --generate-code arch=##,code=##
flags can be deleted and replaced with --generate-code arch=compute_35,code=sm_35
.
1 # Description of Mac/Linux/Unix Makefile for example_speed.
3 #############################
4 # build_path := Where to build program and put executable (note: folder must already exist)
5 # api_path_source := Location of API source folder
6 # api_path_include := Location of API include folder
7 # EXEC_FILE := Name of executable
9 # NVCC := Compiler path, in this case nvcc is in $PATH
10 # CFLAGS := Compiler Flags: optimize most, fast math, add API include folder to include search path, equivalent to --relocatable-device-code=true --compile
11 # CODE := GPU types for which to build explicitly (I have a NVIDIA GTX 780M and 980) https://developer.nvidia.com/cuda-gpus, creates machine code for code=sm_30 (780) and code=sm_52 (980) and virtual architectures for all other generations which can be compiled JIT - code=compute_30 for generations between (3.0,5.0) and code=compute_50 for generations (5.0 and up) http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation
13 # object_names := Objects required for executable
14 # objects := Prepends build path to object names
16 # all := Command 'make' or 'make all' builds executable file $(EXEC_FILE) in location $(build_path)/
18 # ##### Object Dependencies Lists #####
19 # If one of the files on the right hand side of the : changes, the corresponding object must be recompiled.
20 # This is a users makefile - it assumes there will be no changes to the GOFish API files, so does not include
21 # all the non-*.cu (non source file) dependencies. If changes to the API .h or .cuh files are expected see
22 # Object Dependencies Lists in example_dadi or run 'make clean' before each 'make all'.
23 # ##### End Object Dependencies Lists #####
25 # $(objects) := Make target all objects
26 # Compile source code into objects, $< := dependencies from Object Dependencies Lists, $@ := object in $(objects)
28 # $(build_path)/$(EXEC_FILE) := Make target executable EXEC_FILE which depends on all objects
29 # Link objects into executable EXEC_FILE, $@ := $(build_path)/$(EXEC_FILE)
31 # .PHONY := Defines 'all' and 'clean' as not true targets (i.e. don't remake executable if can't find files called 'all' or 'clean')
33 # clean := Action to perform for command 'make clean'
34 # Remove all objects and EXEC_FILE from build_path
35 #############################
37 build_path = ../example_speed
38 api_path_source = ../../3P/_internal
39 api_path_include = ../../3P
43 CFLAGS = -O3 --use_fast_math -I $(api_path_include)/ -dc
44 CODE = --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_30,code=compute_30 --generate-code arch=compute_50,code=compute_50
46 object_names = run.o shared.o go_fish_impl.o
47 objects = $(addprefix $(build_path)/,$(object_names))
49 all:$(build_path)/$(EXEC_FILE)
51 ##### OBJECT DEPENDENCIES #####
52 $(build_path)/run.o: run.cu
53 $(build_path)/shared.o: $(api_path_source)/shared.cu
54 $(build_path)/go_fish_impl.o: $(api_path_source)/go_fish_impl.cu
55 ##### END OBJECT DEPENDENCIES #####
58 $(NVCC) $(CODE) $(CFLAGS) $< -o $@
60 $(build_path)/$(EXEC_FILE): $(objects)
62 $(NVCC) $(CODE) $(objects) -o $@
67 rm -f $(objects) $(build_path)/$(EXEC_FILE)