Parallel PopGen Package
Example3-Compilation

Compilation of different API features, including compilation.

Function run_migration_equilibrium_simulation, implemented in examples/example_compilation/run.cu, runs a simulation given a mutation rate and outputs result into allele trajectory a. The information from a is then copied to b and a is subsequently deleted. The information from b is then output to a file then to the terminal.

The main purpose of this is to show that simulation parameters can be passed from a standard C++ program to a CUDA program (*.cu), where the GPU-accelerated simulation is run, and then the simulation results in the form of an allele trajectory can be passed back to the C++ program and manipulated. So accelerated simulations can be inserted into any current C/C++ project. go_fish_data_struct.h is included in run.h (which is subsequently included in main.cpp).

1 /*
2  * main.cpp
3  *
4  * Author: David Lawrie
5  */
6 
7 #include "run.h"
8 #include <fstream>
9 
10 int main(int argc, char **argv){
12  a.sim_input_constants.seed1 = 0xbeeff00d; //random number seeds
13  a.sim_input_constants.seed2 = 0xdecafbad;
14 
15  float migration_rate = 0.005; //migration proportion (equal between populations)
16  run_migration_equilibrium_simulation(a,migration_rate); //run simulation
17  GO_Fish::allele_trajectories b = a; //copy-assignment, copies a to b (unneeded, just showing it is possible)
18  a.reset(); //frees memory held by a (unneeded, just showing it is possible)
19 
20  /* --- output simulation information --- */
21  std::ofstream outfile;
22  outfile.open("bfile.dat");
23  outfile<<b; //prints full allele_trajectory b to file
24  outfile.close();
25 
26  std::cout<<std::endl<<"number of time samples: " << b.num_time_samples();
27  std::cout<<std::endl<<"mutations in first time sample: " << b.num_mutations_time_sample(0) <<std::endl<<"mutations in final time sample: " << b.maximal_num_mutations() << std::endl; //mutations in final time sample >= mutations in first time sample as all mutations in the latter were preserved by sampling
28 
29  //prints the first 10 mutations output from burn-in simulation
30  std::cout<<std::endl<<"mutations from burn-in simulation\n";
31  int mutation_range_begin = 0; int mutation_range_end = mutation_range_begin+10;
32  std::cout<<"ID\tstart frequency pop 1\tstart frequency pop 2\tfinal frequency pop 1\tfinal frequency pop 2"<<std::endl;
33  for(int i = mutation_range_begin; i < mutation_range_end; i++){ std::cout<<b.mutation_ID(i)<<"\t"<<b.frequency(0,0,i)<<"\t"<<b.frequency(0,1,i)<<"\t"<<b.frequency(1,0,i)<<"\t"<<b.frequency(1,1,i)<<std::endl; }
34 
35  //prints the first 10 mutations output from scenario simulation
36  std::cout<<std::endl<<"mutations from scenario simulation\n";
37  mutation_range_begin = b.num_mutations_time_sample(0); mutation_range_end = mutation_range_begin+10;
38  std::cout<<"ID\tstart frequency pop 1\tstart frequency pop 2\tfinal frequency pop 1\tfinal frequency pop 2"<<std::endl;
39  for(int i = mutation_range_begin; i < mutation_range_end; i++){ std::cout<<b.mutation_ID(i)<<"\t"<<b.frequency(0,0,i)<<"\t"<<b.frequency(0,1,i)<<"\t"<<b.frequency(1,0,i)<<"\t"<<b.frequency(1,1,i)<<std::endl; }
40  /* --- end output simulation information --- */
41 }
control and output data structure for GO_Fish simulation
void reset()
deletes all memory held by allele_trajectories, resets constants to default
int num_time_samples()
returns number of time samples taken during simulation run
sim_constants sim_input_constants
constants for initializing the next simulation

The simulation starts off blank, running a burn-in simulation until equilibrium is reached, then running the scenario of interest.

1 /*
2  * run_sim.h
3  *
4  * Author: dlawrie
5  */
6 
7 #include "go_fish_data_struct.h"
8 
9 #ifndef RUN_SIM_H_
10 #define RUN_SIM_H_
11 
12 void run_migration_equilibrium_simulation(GO_Fish::allele_trajectories & a, float migration_rate);
13 
14 #endif /* RUN_SIM_H_ */
Subset of go_fish.cuh (the GO_Fish data structures)
control and output data structure for GO_Fish simulation

The below code for example_compilation/run.cu must also include go_fish.cuh in order to run the simulations as including run.h only includes the GO_Fish data structures.

1 /*
2  * run.cu
3  *
4  * Author: David Lawrie
5  */
6 
7 #include "go_fish.cuh"
8 #include "run.h"
9 
10 //scenario: two populations start in mutation-selection-migration equilibrium with weakly deleterious mutations, then simulate what happens after mutations in population 1 become beneficial (1,000 generations)
11 //migration set in main.cpp
12 void run_migration_equilibrium_simulation(GO_Fish::allele_trajectories & a, float migration_rate){
13  using namespace Sim_Model; //using namespace Sim_Model (avoids Sim_Model::, but compiler can get confused if multiple functions from different namespaces have same name and Sim_Model:: shows ownership)
14  using namespace GO_Fish; //using namespace GO_Fish (avoids GO_Fish::, but compiler can get confused if multiple functions from different namespaces have same name and GO_Fish:: shows ownership)
15 
16  a.sim_input_constants.num_sites = 20*pow(10.f,7); //number of sites
17  a.sim_input_constants.num_populations = 2; //number of populations
18 
19  a.sim_input_constants.init_mse = false; //start from blank simulation
20  F_mu_h_constant mutation(pow(10.f,-9)); //per-site mutation rate
21  F_mu_h_constant inbreeding(1.f); //constant inbreeding
22  demography_constant demography(1000); //number of individuals in both populations
23  migration_constant_equal migration(migration_rate,a.sim_input_constants.num_populations); //constant migration rate
24  selection_constant deleterious(-1.f/1000.f); //constant selection coefficient (weakly deleterious)
25  F_mu_h_constant dominance(0.f); //constant allele dominance (ignored as population is fully inbred)
26  bool_off dont_preserve; //don't preserve mutations
27  bool_off dont_sample; //don't sample generations
28  a.sim_input_constants.compact_interval = 100; //interval between compacts
29 
30  a.sim_input_constants.num_generations = 2*pow(10.f,4); //burn-in simulation to achieve migration equilibrium 20,0000 generations
31  run_sim(a,mutation,demography,migration,deleterious,inbreeding,dominance,dont_preserve,dont_sample); //only sample final generation
32 
33  allele_trajectories c(a); //copy constructor, copies a to c (not actually needed for this simulation, just showing it is possible)
34 
35  bool_on sample; //sample generation
36  bool_pulse<bool_off,bool_on> sample_strategy(dont_sample,sample,0,a.sim_input_constants.num_generations); //sample starting generation of second simulation (i.e. last generation of burn-in simulation)
37  a.sim_input_constants.num_generations = pow(10.f,3); //scenario simulation 1,0000 generations
38  a.sim_input_constants.prev_sim_sample = 0; //start from previous simulation time sample 0
39  selection_constant beneficial(20.f/1000.f); //constant selection coefficient (beneficial)
40  selection_population_specific<selection_constant,selection_constant> selection_model(deleterious,beneficial,1); //selection in population 1
41  run_sim(a,mutation,demography,migration,selection_model,inbreeding,dominance,dont_preserve,sample_strategy,a); //scenario simulation, start from migration equilibrium, sample both start and final generations
42 }
functor: models selection coefficient s as a constant across populations and over time ...
Definition: go_fish.cuh:32
int prev_sim_sample
time sample of previous simulation to use for initializing current simulation
functor: turns sampling and preserving off (for every generation except the final one which is always...
Definition: go_fish.cuh:272
control and output data structure for GO_Fish simulation
int compact_interval
how often to compact the simulation and remove fixed or lost mutations
functor: single, constant population size (N individuals) across populations and over time ...
Definition: go_fish.cuh:154
functor: one population, pop, has a different, selection function, s_pop, all other have function s ...
Definition: go_fish.cuh:72
Namespace of functions for controlling GO_Fish simulations.
Definition: go_fish.cuh:26
int num_populations
number of populations in simulation
functor: models parameter p as a constant across populations and over time
Definition: go_fish.cuh:101
functor: migration flows at rate m from pop i to pop j =/= i and 1-(num_pop-1)*m for i == j ...
Definition: go_fish.cuh:232
functor: turns sampling and preserving on (for every generation except the final one which is always ...
Definition: go_fish.cuh:277
GO Fish Simulation API (contains namespaces GO_Fish and Sim_Model)
__host__ void run_sim(allele_trajectories &all_results, const Functor_mutation mu_rate, const Functor_demography demography, const Functor_migration mig_prop, const Functor_selection sel_coeff, const Functor_inbreeding FI, const Functor_dominance dominance, const Functor_preserve preserve_mutations, const Functor_timesample take_sample)
runs a single-locus Wright-Fisher simulation specified by the given simulation functions and sim_cons...
float num_sites
number of sites in simulation
bool init_mse
true: initialize simulation in mutation_selection_equilibrium; false: initialize blank simulation or ...
Namespace for single-locus, forward, Monte-Carlo Wright-Fisher simulation and output data structures...
Definition: go_fish.cuh:326
sim_constants sim_input_constants
constants for initializing the next simulation
functor: returns the result of function f_default except at generation pulse returns the result of fu...
Definition: go_fish.cuh:298
int num_generations
number of generations in simulation

Pro Tip: A nice thing about CUDA is the flexibility it gives in compiling mixed C/C++ and CUDA programs. Below is an example where the entire program can alternatively be compiled by NVCC (the NVIDIA CUDA compiler) - e.g. makefile_nvcc - or by g++ & NVCC where NVCC compiles just the GPU-accelerated portions and then hands the linking over g++ - e.g. makefile_gpp. The master makefile controls which is called. This is useful if you are adding the simulation to your own project compiled with a certain compiler that you do not want to change for the code you have written. The CUDA code essentially becomes an accelerated library that you can use in your current project. Master makefile, each line is documented by the top part of the makefile:

1 # Description of Mac/Linux/Unix Makefile for example_compilation.
2 # makefile_nvcc is useful when combining GPU-accelerated code into an existing C++ project and using NVCC to compile all files and link objects into executable.
3 # makefile_gpp is useful when combining GPU-accelerated code into an existing C++ project and user wants to keep non-CUDA code compiled by a different compiler (e.g. g++).
4 #
5 #############################
6 # build_path := Where to build program and put executable (note: folder must already exist)
7 # EXEC_FILE := Name of executable
8 #
9 # cpp_object_names := CPP objects required for executable
10 # cpp_objects := Prepends build path to object names
11 #
12 # cuda_object_names := CUDA objects required for executable
13 # cuda_objects := Prepends build path to object names
14 #
15 # nvcc := Command 'make' or 'make nvcc' calls the makefile makefile_nvcc
16 #
17 # gpp := Command 'make gpp' calls the makefile makefile_gpp
18 #
19 # .PHONY := Defines 'nvvc', 'gpp', and 'clean' as not true targets (i.e. don't remake executable if can't find files called 'all' or 'clean')
20 #
21 # clean := Action to perform for command 'make clean'
22 # Remove all objects and EXEC_FILE from build_path
23 #############################
24 
25 build_path = ../example_compilation
26 EXEC_FILE = GOFish
27 
28 cpp_object_names = main.o
29 cpp_objects = $(addprefix $(build_path)/,$(cpp_object_names))
30 
31 cuda_object_names = run.o shared.o go_fish_impl.o
32 cuda_objects = $(addprefix $(build_path)/,$(cuda_object_names))
33 
34 nvcc:
35  make -f makefile_nvcc
36 gpp:
37  make -f makefile_gpp
38 
39 .PHONY: nvcc gpp clean
40 
41 clean:
42  rm -f $(cuda_objects) $(cpp_objects) $(build_path)/gpuCode.o $(build_path)/$(EXEC_FILE)

makefile_nvcc, each line is documented by the top part of the makefile:

Tip: The makefile below compiles machine code explicitly for generation 3.0 and 5.2 GPUs and uses just in time (JIT) compilation for everything else (lowest GPU generation which works for 3P is 3.0). Compilation (and program execution) will be faster if compiling for your specific GPU.

e.g. if running a Tesla K20 or Tesla K40, then the corresponding GPU generation is 3.5: all the current --generate-code arch=##,code=## flags can be deleted and replaced with --generate-code arch=compute_35,code=sm_35.

1 # Description of Mac/Linux/Unix Makefile for example_compilation.
2 # NVCC compiles C++ and CUDA code and links all objects into an exectuable.
3 #
4 #############################
5 # build_path := Where to build program and put executable (note: folder must already exist)
6 # api_path_source := Location of API source folder
7 # api_path_include := Location of API include folder
8 # EXEC_FILE := Name of executable
9 #
10 # NVCC := Compiler path, in this case nvcc is in $PATH
11 # CFLAGS_CUDA := Compiler Flags for CUDA (*.cu) Files: optimize most, fast math, add API include folder to include search path, equivalent to --relocatable-device-code=true --compile
12 # CFLAGS_CPP := Compiler Flags for CPP (main.cpp) Files: optimize most, add API include folder to include search path, compile
13 # CODE := GPU types for which to build explicitly (I have a NVIDIA GTX 780M and 980) https://developer.nvidia.com/cuda-gpus, creates machine code for code=sm_30 (780) and code=sm_52 (980) and virtual architectures for all other generations which can be compiled JIT - code=compute_30 for generations between (3.0,5.0) and code=compute_50 for generations (5.0 and up) http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation
14 #
15 # cpp_object_names := CPP objects required for executable
16 # cpp_objects := Prepends build path to object names
17 #
18 # cuda_object_names := CUDA objects required for executable
19 # cuda_objects := Prepends build path to object names
20 #
21 # all := Command 'make' or 'make all' builds executable file $(EXEC_FILE) in location $(build_path)/
22 #
23 # ##### Object Dependencies Lists #####
24 # If one of the files on the right hand side of the : changes, the corresponding object must be recompiled.
25 # This is a users makefile - it assumes there will be no changes to the GOFish API files, so does not include
26 # all the non-*.cu (non source file) dependencies except those that the user created (i.e. run.h). If changes
27 # to the API .h or .cuh files are expected see Object Dependencies Lists in example_dadi or run 'make clean'
28 # before each 'make all'.
29 # ##### End Object Dependencies Lists #####
30 #
31 # $(cpp_objects) := Make target all objects
32 # Compile CPP source code into objects, $< := dependencies from Object Dependencies Lists, $@ := object in $(objects)
33 #
34 # $(cuda_objects) := Make target all objects
35 # Compile CUDA source code into objects, $< := dependencies from Object Dependencies Lists, $@ := object in $(objects)
36 #
37 # $(build_path)/$(EXEC_FILE) := Make target executable EXEC_FILE which depends on all objects
38 # Link CUDA/CPP objects into executable EXEC_FILE, $@ := $(build_path)/$(EXEC_FILE)
39 #
40 # .PHONY := Defines 'all' as not a true target (i.e. don't remake executable if can't find a file called 'all')
41 #############################
42 
43 build_path = ../example_compilation
44 api_path_source = ../../3P/_internal
45 api_path_include = ../../3P
46 EXEC_FILE = GOFish
47 
48 NVCC = nvcc
49 CFLAGS_CUDA = -O3 --use_fast_math -I $(api_path_include)/ -dc
50 CFLAGS_CPP = -O3 -I $(api_path_include)/ -c
51 CODE = --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_30,code=compute_30 --generate-code arch=compute_52,code=compute_52
52 
53 cpp_object_names = main.o
54 cpp_objects = $(addprefix $(build_path)/,$(cpp_object_names))
55 
56 cuda_object_names = run.o shared.o go_fish_impl.o
57 cuda_objects = $(addprefix $(build_path)/,$(cuda_object_names))
58 
59 all: $(build_path)/$(EXEC_FILE)
60 
61 ##### OBJECT DEPENDENCIES #####
62 $(build_path)/main.o: main.cpp run.h
63 $(build_path)/run.o: run.cu run.h
64 $(build_path)/shared.o: $(api_path_source)/shared.cu
65 $(build_path)/go_fish_impl.o: $(api_path_source)/go_fish_impl.cu
66 ##### END OBJECT DEPENDENCIES #####
67 
68 $(cpp_objects):
69  $(NVCC) $(CFLAGS_CPP) $< -o $@
70 
71 $(cuda_objects):
72  $(NVCC) $(CODE) $(CFLAGS_CUDA) $< -o $@
73 
74 $(build_path)/$(EXEC_FILE): $(cpp_objects) $(cuda_objects)
75 
76  $(NVCC) $(CODE) $(cpp_objects) $(cuda_objects) -o $@
77 
78 .PHONY: all

makefile_gpp, each line is documented by the top part of the makefile:

Tip: The makefile below compiles machine code explicitly for generation 3.0 and 5.2 GPUs and uses just in time (JIT) compilation for everything else (lowest GPU generation which works for 3P is 3.0). Compilation (and program execution) will be faster if compiling for your specific GPU.

e.g. if running a Tesla K20 or Tesla K40, then the corresponding GPU generation is 3.5: all the current --generate-code arch=##,code=## flags can be deleted and replaced with --generate-code arch=compute_35,code=sm_35.

1 # Description of Mac/Linux/Unix Makefile for example_compilation using g++ to compile the C++ code and link objects into executable.
2 # NVCC compiles CUDA code and links device (GPU) code into an object that can be understood by g++ with the help of the CUDA Runtime Library.
3 #
4 #############################
5 # build_path := Where to build program and put executable (note: folder must already exist)
6 # api_path_source := Location of API source folder
7 # api_path_include := Location of API include folder
8 # EXEC_FILE := Name of executable
9 #
10 # CC := Compiler, in this case g++
11 # NVCC := Compiler path, in this case nvcc is in $PATH
12 # CFLAGS_CUDA := Compiler Flags for CUDA (*.cu) Files: optimize most, fast math, add API include folder to include search path, equivalent to --relocatable-device-code=true --compile
13 # CFLAGS_CPP := Compiler Flags for CPP (main.cpp) Files: optimize most, add API include folder to include search path, compile
14 # CODE := GPU types for which to build explicitly (I have a NVIDIA GTX 780M and 980) https://developer.nvidia.com/cuda-gpus, creates machine code for code=sm_30 (780) and code=sm_52 (980) and virtual architectures for all other generations which can be compiled JIT - code=compute_30 for generations between (3.0,5.0) and code=compute_50 for generations (5.0 and up) http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation
15 # LIB_CUDART := Location of the CUDA Runtime API
16 #
17 # cpp_object_names := CPP objects required for executable
18 # cpp_objects := Prepends build path to object names
19 #
20 # cuda_object_names := CUDA objects required for executable
21 # cuda_objects := Prepends build path to object names
22 #
23 # all := Command 'make' or 'make all' builds executable file $(EXEC_FILE) in location $(build_path)/
24 #
25 # ##### Object Dependencies Lists #####
26 # If one of the files on the right hand side of the : changes, the corresponding object must be recompiled.
27 # This is a users makefile - it assumes there will be no changes to the GOFish API files, so does not include
28 # all the non-*.cu (non source file) dependencies except those that the user created (i.e. run.h). If changes
29 # to the API .h or .cuh files are expected see Object Dependencies Lists in example_dadi or run 'make clean'
30 # before each 'make all'.
31 # ##### End Object Dependencies Lists #####
32 #
33 # $(cpp_objects) := Make target all objects
34 # Use g++ to compile CPP source code into objects, $< := dependencies from Object Dependencies Lists, $@ := object in $(objects)
35 #
36 # $(cuda_objects) := Make target all objects
37 # Use nvcc to compile CUDA source code into objects, $< := dependencies from Object Dependencies Lists, $@ := object in $(objects)
38 #
39 # $(build_path)/gpuCode.o := Make target CUDA object which depends on cuda objects
40 # Use nvcc to link device code from CUDA objects into object gpuCode.o, $@ := $(build_path)/gpuCode.o
41 #
42 # $(build_path)/$(EXEC_FILE) := Make target executable EXEC_FILE which depends on all objects
43 # Use g++ to link CUDA/CPP objects into executable EXEC_FILE reintegrating device code using gpuCode.o and the CUDA runtime library, $@ := $(build_path)/$(EXEC_FILE)
44 #
45 # .PHONY := Defines 'all' as not a true target (i.e. don't remake executable if can't find a file called 'all')
46 #############################
47 
48 build_path = ../example_compilation
49 api_path_source = ../../3P/_internal
50 api_path_include = ../../3P
51 EXEC_FILE = GOFish
52 
53 CC = g++
54 NVCC = nvcc
55 CFLAGS_CUDA = -O3 --use_fast_math -I $(api_path_include)/ -dc
56 CFLAGS_CPP = -O3 -I $(api_path_include)/ -c
57 CODE = --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_30,code=compute_30 --generate-code arch=compute_52,code=compute_52
58 LIB_CUDART = -L/usr/local/cuda/lib -lcudart
59 
60 cpp_object_names = main.o
61 cpp_objects = $(addprefix $(build_path)/,$(cpp_object_names))
62 
63 cuda_object_names = run.o shared.o go_fish_impl.o
64 cuda_objects = $(addprefix $(build_path)/,$(cuda_object_names))
65 
66 all: $(build_path)/$(EXEC_FILE)
67 
68 ##### OBJECT DEPENDENCIES #####
69 $(build_path)/main.o: main.cpp run.h
70 $(build_path)/run.o: run.cu run.h
71 $(build_path)/shared.o: $(api_path_source)/shared.cu
72 $(build_path)/go_fish_impl.o: $(api_path_source)/go_fish_impl.cu
73 ##### END OBJECT DEPENDENCIES #####
74 
75 $(cpp_objects):
76  $(CC) $(CFLAGS_CPP) $< -o $@
77 
78 $(cuda_objects):
79  $(NVCC) $(CODE) $(CFLAGS_CUDA) $< -o $@
80 
81 $(build_path)/gpuCode.o: $(cuda_objects)
82  $(NVCC) $(CODE) -dlink $(cuda_objects) -o $@
83 
84 $(build_path)/$(EXEC_FILE): $(cpp_objects) $(cuda_objects) $(build_path)/gpuCode.o
85 
86  $(CC) $(cpp_objects) $(build_path)/gpuCode.o $(cuda_objects) $(LIB_CUDART) -o $@
87 
88 .PHONY: all