Parallel PopGen Package
Example1-Speed

A simple, benchmarking scenario.

Below code tests the speed of a neutral, constant population simulation on a GPU:

1 /*
2  * run.cu
3  *
4  * Author: David Lawrie
5  */
6 
7 #include "go_fish.cuh"
8 
9 void run_speed_test()
10 {
11  //----- speed test scenario parameters -----
13  a.sim_input_constants.num_populations = 1; //1 population
14  Sim_Model::F_mu_h_constant mutation(pow(10.f,-9)); //per-site mutation rate 10^-9
15  Sim_Model::F_mu_h_constant inbreeding(1.f); //constant inbreeding (fully inbred)
16  Sim_Model::demography_constant demography(pow(10.f,5)*(1+inbreeding(0,0))); //200,000 haploid individuals in population, set to maintain consistent effective number of chromosomes invariant w.r.t. inbreeding
17  Sim_Model::migration_constant_equal migration; //constant, 0, migration rate
18  Sim_Model::selection_constant selection(0); //constant, neutral, selection coefficient
19  Sim_Model::F_mu_h_constant dominance(0.f); //constant allele dominance (effectively ignored since F = 1)
20  a.sim_input_constants.num_generations = pow(10.f,3); //1,000 generations in simulation
21  a.sim_input_constants.seed1 = 0xbeeff00d; //random number seeds
22  a.sim_input_constants.seed2 = 0xdecafbad;
23 
24  a.sim_input_constants.num_sites = 20*2*pow(10.f,7); //number of sites
25  a.sim_input_constants.compact_interval = 15; //compact interval (in general: decrease compact interval for larger number of sites)
26  //----- end speed test scenario parameters -----
27 
28  //----- speed test -----
29  cudaEvent_t start, stop; //CUDA timing functions
30  float elapsedTime;
31  int num_iter = 20;
32  cudaEventCreate(&start);
33  cudaEventCreate(&stop);
34 
35  for(int i = 0; i < num_iter; i++){
36  if(i == num_iter/2){ cudaEventRecord(start, 0); } //use half of the simulations to warm-up GPU, the other half to time simulation runs
37  GO_Fish::run_sim(a,mutation,demography,migration,selection,inbreeding,dominance,Sim_Model::bool_off(),Sim_Model::bool_off());
38  }
39 
40  elapsedTime = 0;
41  cudaEventRecord(stop, 0);
42  cudaEventSynchronize(stop);
43  cudaEventElapsedTime(&elapsedTime, start, stop);
44  cudaEventDestroy(start);
45  cudaEventDestroy(stop);
46 
47  std::cout<<"number of sites:\t"<< a.sim_input_constants.num_sites<<std::endl<< "compact interval:\t"<< a.sim_input_constants.compact_interval<<std::endl<<"number of mutations:\t"<<a.maximal_num_mutations()<<std::endl<<"time elapsed (ms):\t"<<2*elapsedTime/num_iter<<std::endl;
48  //----- end speed test -----
49  //
50 }
51 
53 
55 
56 int main(int argc, char **argv){ run_speed_test(); }
functor: models selection coefficient s as a constant across populations and over time ...
Definition: go_fish.cuh:32
functor: turns sampling and preserving off (for every generation except the final one which is always...
Definition: go_fish.cuh:272
control and output data structure for GO_Fish simulation
int compact_interval
how often to compact the simulation and remove fixed or lost mutations
functor: single, constant population size (N individuals) across populations and over time ...
Definition: go_fish.cuh:154
int num_populations
number of populations in simulation
int maximal_num_mutations()
returns number of reported mutations in the final time sample (maximal number of stored mutations in ...
functor: models parameter p as a constant across populations and over time
Definition: go_fish.cuh:101
functor: migration flows at rate m from pop i to pop j =/= i and 1-(num_pop-1)*m for i == j ...
Definition: go_fish.cuh:232
GO Fish Simulation API (contains namespaces GO_Fish and Sim_Model)
__host__ void run_sim(allele_trajectories &all_results, const Functor_mutation mu_rate, const Functor_demography demography, const Functor_migration mig_prop, const Functor_selection sel_coeff, const Functor_inbreeding FI, const Functor_dominance dominance, const Functor_preserve preserve_mutations, const Functor_timesample take_sample)
runs a single-locus Wright-Fisher simulation specified by the given simulation functions and sim_cons...
float num_sites
number of sites in simulation
sim_constants sim_input_constants
constants for initializing the next simulation
int num_generations
number of generations in simulation

In the example makefile below, each line is documented by the top part of the makefile:

Tip: The makefile below compiles machine code explicitly for generation 3.0 and 5.2 GPUs and uses just in time (JIT) compilation for everything else (lowest GPU generation which works for 3P is 3.0). Compilation (and program execution) will be faster if compiling for your specific GPU.

e.g. if running a Tesla K20 or Tesla K40, then the corresponding GPU generation is 3.5: all the current --generate-code arch=##,code=## flags can be deleted and replaced with --generate-code arch=compute_35,code=sm_35.

1 # Description of Mac/Linux/Unix Makefile for example_speed.
2 #
3 #############################
4 # build_path := Where to build program and put executable (note: folder must already exist)
5 # api_path_source := Location of API source folder
6 # api_path_include := Location of API include folder
7 # EXEC_FILE := Name of executable
8 #
9 # NVCC := Compiler path, in this case nvcc is in $PATH
10 # CFLAGS := Compiler Flags: optimize most, fast math, add API include folder to include search path, equivalent to --relocatable-device-code=true --compile
11 # CODE := GPU types for which to build explicitly (I have a NVIDIA GTX 780M and 980) https://developer.nvidia.com/cuda-gpus, creates machine code for code=sm_30 (780) and code=sm_52 (980) and virtual architectures for all other generations which can be compiled JIT - code=compute_30 for generations between (3.0,5.0) and code=compute_50 for generations (5.0 and up) http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation
12 #
13 # object_names := Objects required for executable
14 # objects := Prepends build path to object names
15 #
16 # all := Command 'make' or 'make all' builds executable file $(EXEC_FILE) in location $(build_path)/
17 #
18 # ##### Object Dependencies Lists #####
19 # If one of the files on the right hand side of the : changes, the corresponding object must be recompiled.
20 # This is a users makefile - it assumes there will be no changes to the GOFish API files, so does not include
21 # all the non-*.cu (non source file) dependencies. If changes to the API .h or .cuh files are expected see
22 # Object Dependencies Lists in example_dadi or run 'make clean' before each 'make all'.
23 # ##### End Object Dependencies Lists #####
24 #
25 # $(objects) := Make target all objects
26 # Compile source code into objects, $< := dependencies from Object Dependencies Lists, $@ := object in $(objects)
27 #
28 # $(build_path)/$(EXEC_FILE) := Make target executable EXEC_FILE which depends on all objects
29 # Link objects into executable EXEC_FILE, $@ := $(build_path)/$(EXEC_FILE)
30 #
31 # .PHONY := Defines 'all' and 'clean' as not true targets (i.e. don't remake executable if can't find files called 'all' or 'clean')
32 #
33 # clean := Action to perform for command 'make clean'
34 # Remove all objects and EXEC_FILE from build_path
35 #############################
36 
37 build_path = ../example_speed
38 api_path_source = ../../3P/_internal
39 api_path_include = ../../3P
40 EXEC_FILE = GOFish
41 
42 NVCC = nvcc
43 CFLAGS = -O3 --use_fast_math -I $(api_path_include)/ -dc
44 CODE = --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_30,code=compute_30 --generate-code arch=compute_50,code=compute_50
45 
46 object_names = run.o shared.o go_fish_impl.o
47 objects = $(addprefix $(build_path)/,$(object_names))
48 
49 all:$(build_path)/$(EXEC_FILE)
50 
51 ##### OBJECT DEPENDENCIES #####
52 $(build_path)/run.o: run.cu
53 $(build_path)/shared.o: $(api_path_source)/shared.cu
54 $(build_path)/go_fish_impl.o: $(api_path_source)/go_fish_impl.cu
55 ##### END OBJECT DEPENDENCIES #####
56 
57 $(objects):
58  $(NVCC) $(CODE) $(CFLAGS) $< -o $@
59 
60 $(build_path)/$(EXEC_FILE): $(objects)
61 
62  $(NVCC) $(CODE) $(objects) -o $@
63 
64 .PHONY: all clean
65 
66 clean:
67  rm -f $(objects) $(build_path)/$(EXEC_FILE)