Parallel PopGen Package
Example2-DaDi

Compares SFS generation using executable GOFish (GO_Fish and Spectrum) against δaδi.

See subsection δaδi-to-GOFish Parameter Conversion in Getting Started for how to translate parameters from δaδi to GO_Fish in general.

demography.jpg
Demography of population 0 (AF) and population 1 (EU)

A complex demographic scenario was chosen as a test case to compare the GO_Fish simulation against an already established SFS method, δaδi. The demographic model is from the YRI-CEU (AF-EU) δaδi example. Using δaδi parameterization to describe the model, the ancestral population, in mutation-selection equilibrium, undergoes an immediate expansion from Nref to 2Nref individuals. After time T1 (= 0.005) the population splits into two with a constant, equivalent migration, mEU-AF (= 1) between the now split populations. The second (EU) population undergoes a severe bottleneck of 0.05Nref when splitting from the first (AF) population, followed by exponential growth over time T2 (= 0.045) to size 5Nref. The mutations are weakly deleterious and co-dominant (2Nrefs = -2, h =0.5), where 1001 samples were taken of the EU population. The spectrum was then normalized by the number of segregating sites. The corresponding GO Fish parameters for the evolutionary scenario, given a mutation rate of 1x10-9 per site, 2x109 sites, and an initial population size, Nref, of 10,000, are: T1 = 0.005*2Nref = 100 generations, T2 = 900 generations, mEU-AF = 1/(2Nref) = 0.00005, 2Nrefs = -4, h =0.5, and F = 0. As in δaδi, the population size/time can be scaled together and the simulation will generate the same normalized spectra. Below is the GO_Fish simulation and Spectrum SFS code:

1 /*
2  * run.cu
3  *
4  * Author: David Lawrie
5  */
6 
7 #include "go_fish.cuh"
8 #include "spectrum.h"
9 #include <vector>
10 
11 /*
12  * population 0 starts of in mutation-selection equilibrium at size N_ind
13  * population 0 grows to size 2*N_ind at generation 1
14  * at generation 0.01*N_ind, population 1 splits off from population 0
15  * population 1's initial size is 0.05*N_ind
16  * population 1 grows exponentially to size 5*N_ind for 0.09*N_ind generations
17  *
18  * migration between populations is at rate 1/(2*N_ind) starting at generation 0.01*N_ind+1
19  *
20  * selection is weakly deleterious (gamma = -4), mutations are co-dominant (h = 0.5), populations are outbred (F = 0)
21  */
22 
23 void run_validation_test(){
24  typedef Sim_Model::demography_constant dem_const;
29 
30  typedef Sim_Model::migration_constant_equal mig_const;
34  float scale_factor = 1.0f; //entire simulation can be scaled up or down with little to no change in resulting normalized SFS
35 
37  b.sim_input_constants.num_populations = 2; //number of populations
38  b.sim_input_constants.num_generations = scale_factor*pow(10.f,3)+1; //1,000 generations
39 
40  Sim_Model::F_mu_h_constant codominant(0.5f); //dominance (co-dominant)
41  Sim_Model::F_mu_h_constant outbred(0.f); //inbreeding (outbred)
42  Sim_Model::F_mu_h_constant mutation(pow(10.f,-9)/scale_factor); //per-site mutation rate 10^-9
43 
44  int N_ind = scale_factor*pow(10.f,4); //initial number of individuals in population
45  dem_const pop0(N_ind);
46  dem_const pop1(0);
47  dem_pop_const gen0(pop0,pop1,1); //intial population size of N_ind for pop 0 and 0 for pop 1
48  dem_const pop0_final(2*N_ind);
49  dem_pop_const gen1(pop0_final,pop1,1);
50  init_expansion gen_0_1(gen0,gen1,1); //population 0 grows to size 2*N_ind
51  exp_growth pop1_gen100((log(100.f)/(scale_factor*900.f)),0.05*N_ind,scale_factor*100);
52  dem_pop_const_exp gen100(pop0_final,pop1_gen100,1); //population 1 grows exponentially from size 0.05*N_ind to 5*N_ind
53  Sim_Model::demography_piecewise<init_expansion,dem_pop_const_exp> demography_model(gen_0_1,gen100,scale_factor*100);
54 
55  mig_const no_mig_pop0;
56  mig_dir no_pop1_gen0(0.f,1,1,no_mig_pop0);
57  mig_split create_pop1(1.f,0,1,no_pop1_gen0); //individuals from population 0 migrate to form population 1
58  split_pop0 migration_split(no_mig_pop0,create_pop1,scale_factor*100);
59  float mig = 1.f/(2.f*N_ind);
60  mig_const mig_prop(mig,b.sim_input_constants.num_populations); //constant and equal migration between populations
61  Sim_Model::migration_piecewise<split_pop0,mig_const> mig_model(migration_split,mig_prop,scale_factor*100+1);
62 
63  float gamma = -4; //effective selection
64  Sim_Model::selection_constant weak_del(gamma,demography_model,outbred);
65 
66  b.sim_input_constants.compact_interval = 30; //compact interval
67  b.sim_input_constants.num_sites = 100*2*pow(10.f,7); //number of sites
68  int sample_size = 1001; //number of samples in SFS
69 
70  int num_iter = 50; //number of iterations
71  Spectrum::SFS my_spectra;
72 
73  cudaEvent_t start, stop; //CUDA timing functions
74  float elapsedTime;
75  cudaEventCreate(&start);
76  cudaEventCreate(&stop);
77 
78  float avg_num_mutations = 0;
79  float avg_num_mutations_sim = 0;
80  std::vector<std::vector<float> > results(num_iter); //storage for SFS results
81  for(int j = 0; j < num_iter; j++){ results[j].reserve(sample_size); }
82 
83  for(int j = 0; j < num_iter; j++){
84  if(j == num_iter/2){ cudaEventRecord(start, 0); } //use 2nd half of the simulations to time simulation runs + SFS creation
85 
86  b.sim_input_constants.seed1 = 0xbeeff00d + 2*j; //random number seeds
87  b.sim_input_constants.seed2 = 0xdecafbad - 2*j;
88  GO_Fish::run_sim(b, mutation, demography_model, mig_model, weak_del, outbred, codominant, Sim_Model::bool_off(), Sim_Model::bool_off());
89  Spectrum::site_frequency_spectrum(my_spectra,b,0,1,sample_size);
90 
91  avg_num_mutations += ((float)my_spectra.num_mutations)/num_iter;
92  avg_num_mutations_sim += b.maximal_num_mutations()/num_iter;
93  for(int i = 0; i < sample_size; i++){ results[j][i] = my_spectra.frequency_spectrum[i]; }
94  }
95 
96  elapsedTime = 0;
97  cudaEventRecord(stop, 0);
98  cudaEventSynchronize(stop);
99  cudaEventElapsedTime(&elapsedTime, start, stop);
100  cudaEventDestroy(start);
101  cudaEventDestroy(stop);
102 
103  //output SFS simulation results
104  std::cout<<"SFS :"<<std::endl<< "allele count\tavg# mutations\tstandard dev\tcoeff of variation (aka relative standard deviation)"<< std::endl;
105  for(int i = 1; i < sample_size; i++){
106  double avg = 0;
107  double std = 0;
108  float num_mutations;
109  for(int j = 0; j < num_iter; j++){ num_mutations = b.num_sites() - results[j][0]; avg += results[j][i]/(num_iter*num_mutations); }
110  for(int j = 0; j < num_iter; j++){ num_mutations = b.num_sites() - results[j][0]; std += 1.0/(num_iter-1)*pow(results[j][i]/num_mutations-avg,2); }
111  std = sqrt(std);
112  std::cout<<i<<"\t"<<avg<<"\t"<<std<<"\t"<<(std/avg)<<std::endl;
113  }
114 
115  std::cout<<"\nnumber of sites in simulation: "<< b.num_sites() <<"\ncompact interval: "<< b.last_run_constants().compact_interval;
116  std::cout<<"\naverage number of mutations in simulation: "<<avg_num_mutations_sim<<"\naverage number of mutations in SFS: "<<avg_num_mutations<<"\ntime elapsed (ms): "<< 2*elapsedTime/num_iter<<std::endl;
117 }
118 
120 
122 
123 int main(int argc, char **argv) { run_validation_test(); }
functor: models selection coefficient s as a constant across populations and over time ...
Definition: go_fish.cuh:32
float * frequency_spectrum
site frequency spectrum data structure
Definition: spectrum.h:25
functor: migration function changes from m1 to m2 at generation inflection_point
Definition: go_fish.cuh:256
functor: turns sampling and preserving off (for every generation except the final one which is always...
Definition: go_fish.cuh:272
control and output data structure for GO_Fish simulation
int compact_interval
how often to compact the simulation and remove fixed or lost mutations
functor: single, constant population size (N individuals) across populations and over time ...
Definition: go_fish.cuh:154
functor: migration flows at rate m from pop1 to pop2 and function rest for all other migration rates ...
Definition: go_fish.cuh:243
site frequency spectrum data structure (at the moment, functions only generate SFS for a single popul...
Definition: spectrum.h:24
functor: one population, pop, has a different, demography function, d_pop, all others have function...
Definition: go_fish.cuh:203
int num_populations
number of populations in simulation
proto-API for building site frequency spectra (contains the titular namespace Spectrum) ...
float num_mutations
number of segregating mutations in SFS
Definition: spectrum.h:30
int maximal_num_mutations()
returns number of reported mutations in the final time sample (maximal number of stored mutations in ...
void site_frequency_spectrum(SFS &mySFS, const GO_Fish::allele_trajectories &all_results, const int sample_index, const int population_index, const int sample_size, int cuda_device=-1)
create a single-population SFS of size sample_size from a single time point sample_index in a single ...
Definition: spectrum.cu:213
functor: models parameter p as a constant across populations and over time
Definition: go_fish.cuh:101
functor: migration flows at rate m from pop i to pop j =/= i and 1-(num_pop-1)*m for i == j ...
Definition: go_fish.cuh:232
GO Fish Simulation API (contains namespaces GO_Fish and Sim_Model)
functor: models exponential growth of population size (individuals) over time
Definition: go_fish.cuh:177
__host__ void run_sim(allele_trajectories &all_results, const Functor_mutation mu_rate, const Functor_demography demography, const Functor_migration mig_prop, const Functor_selection sel_coeff, const Functor_inbreeding FI, const Functor_dominance dominance, const Functor_preserve preserve_mutations, const Functor_timesample take_sample)
runs a single-locus Wright-Fisher simulation specified by the given simulation functions and sim_cons...
float num_sites
number of sites in simulation
int num_sites()
returns the number of sites in the simulation
sim_constants sim_input_constants
constants for initializing the next simulation
sim_constants last_run_constants()
returns sim_constants of the simulation currently held by allele_trajectories
functor: demography function changes from d1 to d2 at generation inflection_point ...
Definition: go_fish.cuh:216
int num_generations
number of generations in simulation

Below is the corresponding δaδi code:

1 import time
2 
3 import numpy
4 from numpy import array
5 import dadi
6 
7 def prior_onegrow_mig((nu1F, nu2B, nu2F, m, Tp, T), (n1,n2), pts):
8  """
9  Model with growth, split, bottleneck in pop2, exp recovery, migration
10 
11  nu1F: The ancestral population size after growth. (Its initial size is
12  defined to be 1.)
13  nu2B: The bottleneck size for pop2
14  nu2F: The final size for pop2
15  m: The scaled migration rate
16  Tp: The scaled time between ancestral population growth and the split.
17  T: The time between the split and present
18 
19  n1,n2: Size of fs to generate.
20  pts: Number of points to use in grid for evaluation.
21  """
22  # Define the grid we'll use
23  xx = yy = dadi.Numerics.default_grid(pts)
24  gamma_model = -2;
25  # phi for the equilibrium ancestral population
26  phi = dadi.PhiManip.phi_1D(xx, gamma=gamma_model)
27  # Now do the population growth event.
28  phi = dadi.Integration.one_pop(phi, xx, Tp, nu=nu1F, gamma=gamma_model)
29 
30  # The divergence
31  phi = dadi.PhiManip.phi_1D_to_2D(xx, phi)
32  # We need to define a function to describe the non-constant population 2
33  # size. lambda is a convenient way to do so.
34  nu2_func = lambda t: nu2B*(nu2F/nu2B)**(t/T)
35  phi = dadi.Integration.two_pops(phi, xx, T, nu1=nu1F, nu2=nu2_func, m12=m, m21=m, gamma1=gamma_model, gamma2=gamma_model)
36 
37  # Finally, calculate the spectrum.
38  sfs = dadi.Spectrum.from_phi(phi, (n1,n2), (xx,yy))
39  return sfs
40 
41 def runModel(nu1F, nu2B, nu2F, m, Tp, T):
42 
43  ns = (0,1001)
44  print 'sample sizes:', ns
45 
46  # These are the grid point settings will use for extrapolation.
47  pts_l = [110,120,130]
48 
49  func = prior_onegrow_mig
50  params = array([nu1F, nu2B, nu2F, m, Tp, T])
51 
52  # Make the extrapolating version of the demographic model function.
53  func_ex = dadi.Numerics.make_extrap_func(func)
54 
55  # Calculate the model AFS.
56  model = func_ex(params, ns, pts_l)
57  model = model.marginalize((0,))
58  return model
59 
60 
61 time1 = time.time()
62 
63 model = runModel(2, 0.05, 5, 1, 0.005, 0.045)
64 
65 time2 = time.time()
66 print "total runtime (s): " + str(time2-time1)
67 model = model/model.S()
68 
69 for i in range(0,len(model)):
70  print model[i]

Below is the makefile to create executable GOFish from examples/example_dadi/run.cu, each line is documented by the top part of the makefile:

Tip: The makefile below compiles machine code explicitly for generation 3.0 and 5.2 GPUs and uses just in time (JIT) compilation for everything else (lowest GPU generation which works for 3P is 3.0). Compilation (and program execution) will be faster if compiling for your specific GPU.

e.g. if running a Tesla K20 or Tesla K40, then the corresponding GPU generation is 3.5: all the current --generate-code arch=##,code=## flags can be deleted and replaced with --generate-code arch=compute_35,code=sm_35.

1 # Description of Mac/Linux/Unix Makefile for example_dadi.
2 #
3 #############################
4 # build_path := Where to build program and put executable (note: folder must already exist)
5 # api_path_source := Location of API source folder
6 # api_path_include := Location of API include folder
7 # EXEC_FILE := Name of executable
8 #
9 # NVCC := Compiler path, in this case nvcc is in $PATH
10 # CFLAGS := Compiler Flags: optimize most, fast math, add API include folder to include search path, equivalent to --relocatable-device-code=true --compile
11 # CODE := GPU types for which to build explicitly (I have a NVIDIA GTX 780M and 980) https://developer.nvidia.com/cuda-gpus, creates machine code for code=sm_30 (780) and code=sm_52 (980) and virtual architectures for all other generations which can be compiled JIT - code=compute_30 for generations between (3.0,5.0) and code=compute_50 for generations (5.0 and up) http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation
12 #
13 # object_names := Objects required for executable
14 # objects := Prepends build path to object names
15 #
16 # all := Command 'make' or 'make all' builds executable file $(EXEC_FILE) in location $(build_path)/
17 #
18 # ##### Object Dependencies Lists #####
19 # If one of the files on the right hand side of the : changes, the corresponding object must be recompiled.
20 # This is a developer's makefile - it assumes there will be changes to the GOFish API files, if not
21 # all the non-*.cu (non source file) dependencies are unnecessary.
22 # ##### End Object Dependencies Lists #####
23 #
24 # $(objects) := Make target all objects
25 # Compile source code into objects, $< := dependencies from Object Dependencies Lists, $@ := object in $(objects)
26 #
27 # $(build_path)/$(EXEC_FILE) := Make target executable EXEC_FILE which depends on all objects
28 # Link objects into executable EXEC_FILE, $@ := $(build_path)/$(EXEC_FILE)
29 #
30 # .PHONY := Defines 'all' and 'clean' as not true targets (i.e. don't remake executable if can't find files called 'all' or 'clean')
31 #
32 # clean := Action to perform for command 'make clean'
33 # Remove all objects and EXEC_FILE from build_path
34 #############################
35 
36 build_path = ../example_dadi
37 api_path_source = ../../3P/_internal
38 api_path_include = ../../3P
39 EXEC_FILE = GOFish
40 
41 NVCC = nvcc
42 CFLAGS = -O3 --use_fast_math -I $(api_path_include)/ -dc
43 CODE = --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_30,code=compute_30 --generate-code arch=compute_52,code=compute_52
44 
45 object_names = run.o spectrum.o shared.o go_fish_impl.o
46 objects = $(addprefix $(build_path)/,$(object_names))
47 
48 all:$(build_path)/$(EXEC_FILE)
49 
50 ##### OBJECT DEPENDENCIES #####
51 $(build_path)/run.o: run.cu $(api_path_source)/shared.cuh $(api_path_source)/go_fish_impl.cuh $(api_path_include)/go_fish_data_struct.h $(api_path_include)/go_fish.cuh $(api_path_include)/spectrum.h
52 $(build_path)/spectrum.o: $(api_path_source)/spectrum.cu $(api_path_include)/spectrum.h $(api_path_source)/shared.cuh $(api_path_include)/go_fish_data_struct.h
53 $(build_path)/shared.o: $(api_path_source)/shared.cu $(api_path_source)/shared.cuh
54 $(build_path)/go_fish_impl.o: $(api_path_source)/go_fish_impl.cu $(api_path_source)/go_fish_impl.cuh $(api_path_source)/shared.cuh
55 ##### END OBJECT DEPENDENCIES #####
56 
57 $(objects):
58  $(NVCC) $(CODE) $(CFLAGS) $< -o $@
59 
60 $(build_path)/$(EXEC_FILE): $(objects)
61 
62  $(NVCC) $(CODE) $(objects) -o $@
63 
64 .PHONY: all clean
65 
66 clean:
67  rm -f $(objects) $(build_path)/$(EXEC_FILE)