Parallel PopGen Package
inline_go_fish_data_struct.hpp
1 /*
2  * inline_go_fish_data_struct.hpp
3  *
4  * Author: David Lawrie
5  * inline function definitions for GO Fish data structures
6  */
7 
8 #ifndef INLINE_GOFISH_DATA_FUNCTIONS_HPP_
9 #define INLINE_GOFISH_DATA_FUNCTIONS_HPP_
10 
11 namespace GO_Fish{
12 
14 inline mutID::mutID() : origin_generation(0), origin_population(0), origin_threadID(0), reserved(0) {}
16 inline mutID::mutID(int origin_generation, int origin_population, int origin_threadID, int reserved) : origin_generation(origin_generation), origin_population(origin_population), origin_threadID(origin_threadID), reserved(reserved) {}
17 
19 template<typename T>
20 std::string tostring(const T& value)
21 {
22  std::ostringstream oss;
23  oss << value;
24  return oss.str();
25 }
27 
29 inline std::string mutID::toString() { return "("+tostring(origin_generation)+","+tostring(origin_population)+","+tostring(abs(origin_threadID))+","+tostring(reserved)+")"; } //abs(origin_threadID) so the user doesn't get confused by the preservation flag on ID, here too for eventual allele trajectory.toString() or toFile() more likely
30 
31 inline std::string mutID::toString() const { return "("+tostring(origin_generation)+","+tostring(origin_population)+","+tostring(abs(origin_threadID))+","+tostring(reserved)+")"; } //abs(origin_threadID) so the user doesn't get confused by the preservation flag on ID, here too for eventual allele trajectory.toString() or toFile() more likely
32 
33 inline allele_trajectories::sim_constants::sim_constants(): seed1(0xbeeff00d), seed2(0xdecafbad), num_generations(0), num_sites(1000), num_populations(1), init_mse(true), prev_sim_sample(-1), compact_interval(35), device(-1) {}
34 
35 inline allele_trajectories::time_sample::time_sample(): num_mutations(0), sampled_generation(0) { mutations_freq = NULL; extinct = NULL; Nchrom_e = NULL; /*set pointers to NULL*/}
36 inline allele_trajectories::time_sample::~time_sample(){
37  if(mutations_freq){ delete [] mutations_freq; mutations_freq = NULL; }
38  if(extinct){ delete [] extinct; extinct = NULL; }
39  if(Nchrom_e){ delete [] Nchrom_e; Nchrom_e = NULL; }
40 }
41 
42 inline allele_trajectories::allele_trajectories(): num_samples(0), all_mutations(0) { time_samples = NULL; mutations_ID = NULL; }
43 
45  //replace with shared pointers when moving to CUDA 7+ and C++11? or simply replace this and copy assignment with move constructor & move assignment? or leave as is?
47  sim_run_constants = in.sim_run_constants;
48  num_samples = in.num_samples;
49  all_mutations = in.all_mutations;
50 
51  if(all_mutations > 0){
52  mutations_ID = (mutID *)malloc(all_mutations*sizeof(mutID)); //malloc is faster than new as it doesn't call the default constructor
53  std::memcpy(mutations_ID, in.mutations_ID, all_mutations*sizeof(mutID)); //using memcpy to ensure struct members remain in right memory order bit for bit for when transfering to GPU
54  }
55  else{ mutations_ID = NULL; }
56 
57  if(num_samples > 0){
58  time_samples = new time_sample*[num_samples];
59  for(int i = 0; i < num_samples; i++){
60  time_samples[i] = new time_sample;
61  int num_mutations = in.time_samples[i]->num_mutations;
62  time_samples[i]->num_mutations = num_mutations;
63  time_samples[i]->sampled_generation = in.time_samples[i]->sampled_generation;
64  int num_populations = sim_run_constants.num_populations;
65  time_samples[i]->Nchrom_e = new int[num_populations];
66  time_samples[i]->extinct = new bool[num_populations];
67  for(int j = 0; j < num_populations; j++){
68  time_samples[i]->Nchrom_e[j] = in.time_samples[i]->Nchrom_e[j];
69  time_samples[i]->extinct[j] = in.time_samples[i]->extinct[j];
70  }
71  time_samples[i]->mutations_freq = new float[num_populations*num_mutations];
72  std::memcpy(time_samples[i]->mutations_freq,in.time_samples[i]->mutations_freq,num_populations*num_mutations*sizeof(float));
73  }
74  }
75  else{ time_samples = NULL; }
76 }
77 
79  swap(*this, in);
80  return *this;
81 }
82 
84 
85 inline int allele_trajectories::num_sites(){ return sim_run_constants.num_sites; }
86 
87 inline int allele_trajectories::num_populations(){ return sim_run_constants.num_populations; }
88 
89 inline int allele_trajectories::num_time_samples(){ return num_samples; }
90 
91 inline int allele_trajectories::maximal_num_mutations(){ return all_mutations; }
92 
94  if(!time_samples || num_samples == 0){ fprintf(stderr,"num_mutations error: empty allele_trajectories\n"); exit(1); }
95  if(sample_index < 0 || sample_index > num_samples){ fprintf(stderr,"num_mutations error: requested sample index out of bounds: sample %d [0 %d)\n",sample_index,num_samples); exit(1); }
96  return time_samples[sample_index]->num_mutations;
97 }
98 
99 inline int allele_trajectories::final_generation(){ return sampled_generation(num_samples-1); }
100 
101 inline int allele_trajectories::sampled_generation(int sample_index){
102  if(!time_samples || num_samples == 0){ fprintf(stderr,"sampled_generation error: empty allele_trajectories\n"); exit(1); }
103  if(sample_index < 0 || sample_index > num_samples){ fprintf(stderr,"sampled_generation error: requested sample index out of bounds: sample %d [0 %d)\n",sample_index,num_samples); exit(1); }
104  return time_samples[sample_index]->sampled_generation;
105 }
106 
107 inline bool allele_trajectories::extinct(int sample_index, int population_index){
108  if(!time_samples || num_samples == 0){ fprintf(stderr,"extinct error: empty allele_trajectories\n"); exit(1); }
109  int num_populations = sim_run_constants.num_populations;
110  if((sample_index < 0 || sample_index >= num_samples) || (population_index < 0 || population_index >= num_populations)){ fprintf(stderr,"extinct error: index out of bounds: sample %d [0 %d), population %d [0 %d)\n",sample_index,num_samples,population_index,num_populations); exit(1); }
111  return time_samples[sample_index]->extinct[population_index];
112 }
113 
114 inline int allele_trajectories::effective_number_of_chromosomes(int sample_index, int population_index){
115  if(!time_samples || num_samples == 0){ fprintf(stderr,"effective_number_of_chromosomes error: empty allele_trajectories\n"); exit(1); }
116  int num_populations = sim_run_constants.num_populations;
117  if((sample_index < 0 || sample_index >= num_samples) || (population_index < 0 || population_index >= num_populations)){ fprintf(stderr,"effective_number_of_chromosomes error: index out of bounds: sample %d [0 %d), population %d [0 %d)\n",sample_index,num_samples,population_index,num_populations); exit(1); }
118  return time_samples[sample_index]->Nchrom_e[population_index];
119 }
120 
122 inline float allele_trajectories::frequency(int sample_index, int population_index, int mutation_index){
123  int num_populations = sim_run_constants.num_populations;
124  int num_mutations;
125  if((sample_index >= 0 && sample_index < num_samples) && (population_index >= 0 && population_index < num_populations) && (mutation_index >= 0 && mutation_index < time_samples[num_samples-1]->num_mutations)){
126  num_mutations = time_samples[num_samples-1]->num_mutations;
127  int num_mutations_in_sample = time_samples[sample_index]->num_mutations;
128  if(mutation_index >= num_mutations_in_sample){ return 0; }
129  return time_samples[sample_index]->mutations_freq[mutation_index+population_index*num_mutations_in_sample];
130  }
131  else{
132  if(!time_samples || num_samples == 0){ fprintf(stderr,"frequency error: empty allele_trajectories\n"); exit(1); }
133  num_mutations = time_samples[num_samples-1]->num_mutations;
134  fprintf(stderr,"frequency error: index out of bounds: sample %d [0 %d), population %d [0 %d), mutation %d [0 %d)\n",sample_index,num_samples,population_index,num_populations,mutation_index,num_mutations); exit(1);
135  }
136 }
137 
138 inline mutID allele_trajectories::mutation_ID(int mutation_index){
139  if(num_samples > 0 && time_samples){
140  if(mutation_index >= 0 && mutation_index < all_mutations){ return mutID(mutations_ID[mutation_index].origin_generation,mutations_ID[mutation_index].origin_population,abs(mutations_ID[mutation_index].origin_threadID),mutations_ID[mutation_index].reserved); } //absolute value the user doesn't get confused by the preservation flag on ID
141  fprintf(stderr,"mutation_ID error: requested mutation index out of bounds: mutation %d [0 %d)\n",mutation_index,maximal_num_mutations()); exit(1);
142  }else{ fprintf(stderr,"mutation_ID error: empty allele_trajectories\n"); exit(1); }
143 }
144 
145 
152 inline void allele_trajectories::delete_time_sample(int sample_index){
153  if(sample_index >= 0 && sample_index < num_samples){
154  if(num_samples == 1){ reset(); }
155  else{
156  delete time_samples[sample_index];
157  time_sample ** temp = new time_sample * [num_samples-1];
158  for(int i = 0; i < num_samples; i++){
159  if(i < sample_index){ temp[i] = time_samples[i]; }
160  else if (i > sample_index){ temp[i-1] = time_samples[i]; }
161  }
162  delete [] time_samples;
163  time_samples = temp;
164  num_samples -= 1;
165  all_mutations = time_samples[num_samples-1]->num_mutations; //new maximal number of mutations if last time sample has been deleted, moves the apparent length of mutID array, but does not delete extra data
166  }
167  }else{
168  if(!time_samples || num_samples == 0){ fprintf(stderr,"delete_time_sample error: empty allele_trajectories\n"); exit(1); }
169  fprintf(stderr,"delete_time_sample error: requested sample index out of bounds: sample %d [0 %d)\n",sample_index,num_samples); exit(1);
170  }
171 }
172 
175  if(time_samples){
176  for(int i = 0; i < num_samples; i++){ delete time_samples[i]; }
177  delete [] time_samples;
178  }
179  if(mutations_ID){ free(mutations_ID); }
180  time_samples = NULL; num_samples = 0; mutations_ID = NULL; all_mutations = 0;
181  sim_run_constants = sim_constants();
183 }
184 
186 
187 inline void allele_trajectories::initialize_sim_result_vector(int new_length){
188  sim_constants temp = sim_input_constants; //store sim_input_constants as in this context they are still valid
189  reset(); //overwrite old data if any
190  num_samples = new_length;
191  time_samples = new time_sample *[num_samples];
192  for(int i = 0; i < num_samples; i++){ time_samples[i] = new time_sample(); }
193  sim_input_constants = temp;
194  sim_run_constants = sim_input_constants;
195 }
196 
198 inline std::ostream & operator<<(std::ostream & stream, const mutID & id){ stream << id.toString(); return stream; }
199 
208 inline std::ostream & operator<<(std::ostream & stream, allele_trajectories & A){
209  stream << "seed1" << "\t" << A.sim_run_constants.seed1 << std::endl;
210  stream << "seed2" << "\t" << A.sim_run_constants.seed2 << std::endl;
211  stream << "num_generations" << "\t" << A.sim_run_constants.num_generations << std::endl;
212  stream << "num_sites" << "\t" << A.sim_run_constants.num_sites << std::endl;
213  stream << "num_populations" << "\t" << A.sim_run_constants.num_populations << std::endl;
214  stream << "init_mse" << "\t" << A.sim_run_constants.init_mse << std::endl;
215  stream << "prev_sim_sample" << "\t" << A.sim_run_constants.prev_sim_sample << std::endl;
216  stream << "compact_interval" << "\t" << A.sim_run_constants.compact_interval << std::endl;
217  stream << "device" << "\t" << A.sim_run_constants.device << std::endl << std::endl;
218 
219  if(A.num_samples == 0){ stream << "no simulation stored" << std::endl; return stream; }
220 
221  int num_populations = A.sim_run_constants.num_populations;
222 
223  stream << "time sample:";
224  for(int j = 0; j < A.num_samples; j++){
225  stream << "\t" << j;
226  for(int k = 0; k < num_populations-1; k++){ stream << "\t"; }
227  } stream << std::endl;
228 
229  stream << "generation:";
230  for(int j = 0; j < A.num_samples; j++){
231  stream << "\t" << A.sampled_generation(j);
232  for(int k = 0; k < num_populations-1; k++){ stream << "\t"; }
233  } stream << std::endl << "number of mutations reported:";
234 
235  for(int j = 0; j < A.num_samples; j++){
236  stream << "\t" << A.num_mutations_time_sample(j);
237  for(int k = 0; k < num_populations-1; k++){ stream << "\t"; }
238  } stream << std::endl << "population:" ;
239 
240  for(int j = 0; j < A.num_samples; j++){
241  for(int k = 0; k < num_populations; k++){ stream << "\t" << k; }
242  } stream << std::endl << "effective population size (chromosomes):";
243 
244  for(int j = 0; j < A.num_samples; j++){
245  for(int k = 0; k < num_populations; k++){ stream << "\t" << A.effective_number_of_chromosomes(j,k); }
246  } stream << std::endl << "population extinct:";
247 
248  for(int j = 0; j < A.num_samples; j++){
249  for(int k = 0; k < num_populations; k++){ stream << "\t" << A.extinct(j,k); }
250  } stream << std::endl;
251 
252  if(A.all_mutations == 0){ stream << std::endl << "no mutations stored" << std::endl; return stream; }
253 
254  stream << "mutation ID (origin_generation,origin_population,origin_threadID,reserved)";
255  for(int j = 0; j < A.num_samples; j++){ for(int k = 0; k < num_populations; k++){ stream << "\t" << "frequency"; } }
256  stream << std::endl;
257 
258  for(int i = 0; i < A.all_mutations; i++) {
259  stream << A.mutation_ID(i) <<":";
260  for(int j = 0; j < A.num_samples; j++){
261  for(int k = 0; k < num_populations; k++){ stream << "\t" << A.frequency(j,k,i); }
262  }
263  stream << std::endl;
264  }
265 
266  return stream;
267 }
268 
270  //can use for move constructor/assignment when moving to CUDA 7+ and C++11: http://stackoverflow.com/questions/3279543/what-is-the-copy-and-swap-idiom
273  b.sim_input_constants = temp;
274  temp = a.sim_run_constants;
275  a.sim_run_constants = b.sim_run_constants;
276  b.sim_run_constants = temp;
277  std::swap(a.all_mutations,b.all_mutations);
278  std::swap(a.num_samples,b.num_samples);
279  std::swap(a.mutations_ID,b.mutations_ID);
280  std::swap(a.time_samples,b.time_samples);
281 }
282 
283 } /* ----- end namespace GO_Fish ----- */
284 
285 #endif /* INLINE_GOFISH_DATA_FUNCTIONS_HPP_ */
int prev_sim_sample
time sample of previous simulation to use for initializing current simulation
int origin_generation
generation in which mutation appeared in simulation
control and output data structure for GO_Fish simulation
int compact_interval
how often to compact the simulation and remove fixed or lost mutations
int effective_number_of_chromosomes(int sample_index, int population_index)
returns the effective number of chromosomes of population population_index in time sample sample_inde...
allele_trajectories & operator=(allele_trajectories in)
copy assignment
int final_generation()
returns final generation of simulation
int num_populations()
returns the number of populations in the simulation
bool extinct(int sample_index, int population_index)
returns whether or not population population_index is extinct in time sample sample_index ...
specification of simulation constants
int num_populations
number of populations in simulation
void reset()
deletes all memory held by allele_trajectories, resets constants to default
int maximal_num_mutations()
returns number of reported mutations in the final time sample (maximal number of stored mutations in ...
int origin_threadID
threadID that generated mutation
int num_time_samples()
returns number of time samples taken during simulation run
structure specifying the ID for a mutation in a GO_Fish simulation
int num_mutations_time_sample(int sample_index)
number of reported mutations in the time sample sample_index
void swap(allele_trajectories &a, allele_trajectories &b)
swaps data held by allele_trajectories a and b
int origin_population
population in which mutation first arose
void delete_time_sample(int sample_index)
deletes a single time sample, sample_index
float num_sites
number of sites in simulation
std::ostream & operator<<(std::ostream &stream, const mutID &id)
insertion operator: sends mutID id into the ostream stream
bool init_mse
true: initialize simulation in mutation_selection_equilibrium; false: initialize blank simulation or ...
int device
GPU identity to run simulation on, if -1 next available GPU will be assigned.
int num_sites()
returns the number of sites in the simulation
mutID()
default constructor
Namespace for single-locus, forward, Monte-Carlo Wright-Fisher simulation and output data structures...
Definition: go_fish.cuh:326
sim_constants sim_input_constants
constants for initializing the next simulation
int reserved
reserved for later use, currently 0
float frequency(int sample_index, int population_index, int mutation_index)
returns the frequency of the mutation at time sample sample_index, population population_index, mutation mutation_index
mutID mutation_ID(int mutation_index)
returns the mutation ID at mutation_index
int sampled_generation(int sample_index)
return generation of simulation in the time sample sample_index
std::string toString()
returns a string constant of mutID
sim_constants last_run_constants()
returns sim_constants of the simulation currently held by allele_trajectories
int num_generations
number of generations in simulation