libStatGen Software  1
GenotypeLists.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "GenotypeLists.h"
19 
20 // When the next line is uncommented, the genotype elimination routines
21 // produce a lot of output useful for debugging
22 // #define DEBUG_ELIMINATOR
23 
24 GenotypeList::GenotypeList()
25 {
26  ignore = false;
27 }
28 
29 bool GenotypeList::EliminateGenotypes(Pedigree & ped, Family * family, int marker)
30 {
31  // First, allocate a genotype list for the family
32  GenotypeList * list = new GenotypeList [family->count];
33 
34  // Next, update the possible allele lists for each individual
35  InitializeList(list, ped, family, marker);
36 
37  // Then, do multiple rounds of elimination until a problem is found
38  // or no changes are made
39 
40 #ifdef DEBUG_ELIMINATOR
41  Print(list, ped, family, marker);
42 #endif
43 
44  while (PairwiseCheck(list, ped, family) || FamilyCheck(list, ped, family))
45 #ifdef DEBUG_ELIMINATOR
46  Print(list, ped, family, marker)
47 #endif
48  ;
49 
50  for (int i = 0; i < family->count; i++)
51  if (!list[i].ignore && list[i].allele1.Length() == 0)
52  {
53  printf("%s - Family %s has a subtle genotype inconsistency\n",
54  (const char *) ped.markerNames[marker], (const char *) family->famid);
55 
56  delete [] list;
57  return false;
58  }
59 
60  delete [] list;
61  return true;
62 }
63 
64 void GenotypeList::InitializeList(GenotypeList * list, Pedigree & ped, Family * family, int marker)
65 {
66  for (int i = family->count - 1; i >= 0; i--)
67  {
68  Person & person = ped[family->path[i]];
69  int id = person.traverse;
70  bool maleX = person.sex == SEX_MALE && ped.chromosomeX;
71 
72 #ifdef DEBUG_ELIMINATOR
73  printf("Initializing genotype list for %s ...\n", (const char *) person.pid);
74 #endif
75 
76  // If an individual is genotyped ...
77  if (person.markers[marker].isKnown())
78  {
79  // Their genotype list starts with just one entry!
80  list[id].Dimension(1);
81  list[id].SetGenotype(0, person.markers[marker][0], person.markers[marker][1]);
82  list[id].alleles.Clear();
83  list[id].alleles.Push(person.markers[marker][0]);
84  list[id].alleles.PushIfNew(person.markers[marker][1]);
85  list[id].ignore = false;
86 
87  // "Heterozygous" males have no possible genotypes
88  if (maleX && person.markers[marker].isHeterozygous())
89  list[id].Dimension(0);
90  }
91  else if (list[id].alleles.Length())
92  if (person.sex == SEX_MALE && ped.chromosomeX)
93  {
94  // Males only carry one X chromosome
95  list[id].Dimension(list[id].alleles.Length() + 1);
96 
97  for (int i = 0, out = 0; i < list[id].alleles.Length(); i++)
98  list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[i]);
99  list[id].SetGenotype(list[id].alleles.Length(), -1, -1);
100 
101  list[id].ignore = false;
102  }
103  else
104  {
105  // Build the genotype list based on the available allele lists
106  int count = list[id].alleles.Length() * (list[id].alleles.Length() + 3) / 2 + 1;
107 
108  list[id].Dimension(count);
109 
110  for (int i = 0, out = 0; i < list[id].alleles.Length(); i++)
111  {
112  // Allow for all pairs of "transmitted" alleles
113  for (int j = 0; j <= i; j++)
114  list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[j]);
115 
116  // Allow for an unstransmitted allele
117  list[id].SetGenotype(out++, list[id].alleles[i], -1);
118  }
119 
120  // Allow for a pair of untransmitted alleles
121  list[id].SetGenotype(count - 1, -1, -1);
122 
123  list[id].ignore = false;
124  }
125  else
126  list[id].ignore = true;
127 
128  // If the individual is a founder this is all there is to it
129  if (i < family->founders) continue;
130 
131  // If the individual is not a founder, update the parental genotype lists...
132  int fatid = person.father->traverse;
133  int motid = person.mother->traverse;
134 
135  for (int i = 0; i < list[id].alleles.Length(); i++)
136  {
137  list[motid].alleles.PushIfNew(list[id].alleles[i]);
138  if (!maleX) list[fatid].alleles.PushIfNew(list[id].alleles[i]);
139  }
140  }
141 }
142 
143 bool GenotypeList::PairwiseCheck(GenotypeList * list, Pedigree & ped, Family * family)
144 {
145 #ifdef DEBUG_ELIMINATOR
146  printf("Checking Relative Pairs ...\n");
147 #endif
148 
149  bool changed = false;
150 
151  for (int i = family->count - 1; i >= family->founders; i--)
152  {
153  Person & person = ped[family->path[i]];
154 
155  int id = person.traverse;
156  int fatid = person.father->traverse;
157  int motid = person.mother->traverse;
158 
159  bool maleX = person.sex == SEX_MALE && ped.chromosomeX;
160 
161  if (list[id].ignore) continue;
162 
163  // Check if genotypes are consistent with paternal genotypes
164  for (int i = 0; i < list[id].allele1.Length(); i++)
165  {
166  int al1 = list[id].allele1[i];
167  int al2 = list[id].allele2[i];
168 
169  // Remove offspring genotypes incompatible with parental genotypes
170  if ((maleX && !list[motid].Matches(al1) && al1 != -1) ||
171  (!maleX && !(al1 == -1 && al2 == -1) &&
172  !(list[fatid].Matches(al1) && (al2 == -1 || list[motid].Matches(al2))) &&
173  !((al2 == -1 || list[fatid].Matches(al2)) && list[motid].Matches(al1))))
174  {
175  list[id].Delete(i--);
176  changed = true;
177  }
178  }
179 
180  // The offspring genotype list allows for a wild-card untransmitted allele
181  // so any single parental genotype is possible
182  if (list[id].Matches(-1))
183  continue;
184 
185  // Check if genotypes are consistent with offspring genotypes
186  for (int i = 0; i < list[motid].allele1.Length(); i++)
187  {
188  int al1 = list[motid].allele1[i];
189  int al2 = list[motid].allele2[i];
190 
191  // Remove genotypes incompatible with offspring genotypes
192  if (!list[id].Matches(al1) &&
193  !list[id].Matches(al2))
194  {
195  list[motid].Delete(i--);
196  changed = true;
197  }
198  }
199 
200  // Males don't affect genotype lists for their fathers
201  if (maleX) continue;
202 
203  // Check if genotypes are consistent with offspring genotypes
204  for (int i = 0; i < list[fatid].allele1.Length(); i++)
205  {
206  int al1 = list[fatid].allele1[i];
207  int al2 = list[fatid].allele2[i];
208 
209  // Remove genotypes incompatible with offspring genotypes
210  if (!list[id].Matches(al1) &&
211  !list[id].Matches(al2))
212  {
213  list[fatid].Delete(i--);
214  changed = true;
215  }
216  }
217 
218 #ifdef DEBUG_ELIMINATOR
219  printf("Done checking individual %s\n", (const char *) person.pid);
220  Print(list, ped, family, 0);
221 #endif
222  }
223 
224  return changed;
225 }
226 
227 
228 bool GenotypeList::FamilyCheck(GenotypeList * list, Pedigree & ped, Family * family)
229 {
230 #ifdef DEBUG_ELIMINATOR
231  printf("Checking Nuclear Families ...\n");
232 #endif
233 
234  bool changed = false;
235 
236  for (int i = family->count - 1; i >= family->founders; i--)
237  {
238  Person & person = ped[family->path[i]];
239 
240  int fatid = person.father->traverse;
241  int motid = person.mother->traverse;
242 
243  // Only go through the loop once per sibship
244  if (person.sibs[0] != &person || list[fatid].ignore || list[motid].ignore)
245  continue;
246 
247 #ifdef DEBUG_ELIMINATOR
248  printf("Checking Sibship with %s ...\n", (const char *) person.pid);
249 #endif
250 
251  // Reset checked genotypes for the mother, father and child
252  list[fatid].checked = 0;
253  list[motid].checked = 0;
254 
255  for (int i = 0; i < person.sibCount; i++)
256  list[person.sibs[i]->traverse].checked = 0;
257 
258  // Go through each of the paternal genotypes
259  changed |= TrimParent(list, person, fatid, motid);
260 
261  // Go through each of maternal genotypes
262  changed |= TrimParent(list, person, motid, fatid);
263 
264  // Sort out the unchecked offspring genotypes ...
265  for (int i = 0; i < person.sibCount; i++)
266  {
267  int sibid = person.sibs[i]->traverse;
268  bool maleX = person.sibs[i]->sex == SEX_MALE && ped.chromosomeX;
269 
270  // For dealing with male X chromosomes, the pairwise check is all we need
271  if (maleX) continue;
272 
273  for (int j = list[sibid].checked; j < list[sibid].allele1.Length(); j++)
274  changed |= Cleanup(list, person, motid, fatid, sibid, j);
275  }
276 
277 #ifdef DEBUG_ELIMINATOR
278 // Print(list, ped, family, 0);
279 #endif
280  }
281 
282  return changed;
283 }
284 
285 bool GenotypeList::Matches(int genotype, int allele)
286 {
287  return allele1[genotype] == allele || allele2[genotype] == allele;
288 }
289 
290 bool GenotypeList::Matches(int allele)
291 {
292  return allele1.Find(allele) != -1 || allele2.Find(allele) != -1;
293 }
294 
295 int GenotypeList::SaveGenotype(int genotype)
296 {
297  if (checked > genotype)
298  return genotype;
299 
300  if (checked != genotype)
301  {
302  allele1.Swap(genotype, checked);
303  allele2.Swap(genotype, checked);
304  }
305 
306  return checked++;
307 }
308 
309 bool GenotypeList::CheckTrio(GenotypeList * list, int fatid, int motid, int child,
310  int i, int j, int k)
311 {
312  // TODO: add tests for this code...
313  return (list[fatid].Matches(i, list[child].allele1[k]) &&
314  (list[motid].Matches(j, list[child].allele2[k]) || list[child].allele2[k] == -1)) ||
315  ((list[fatid].Matches(i, list[child].allele2[k]) || list[child].allele2[k] == -1) &&
316  list[motid].Matches(j, list[child].allele1[k])) ||
317  (list[child].allele1[k] == -1 && list[child].allele2[k] == -1);
318 }
319 
320 void GenotypeList::Dimension(int genotypes)
321 {
322  allele1.Dimension(genotypes);
323  allele2.Dimension(genotypes);
324 }
325 
326 void GenotypeList::SetGenotype(int genotype, int al1, int al2)
327 {
328  allele1[genotype] = al1;
329  allele2[genotype] = al2;
330 }
331 
332 void GenotypeList::Delete(int genotype)
333 {
334  allele1.Delete(genotype);
335  allele2.Delete(genotype);
336 }
337 
338 bool GenotypeList::TrimParent(GenotypeList * list, Person & person, int motid, int fatid)
339 {
340  bool trimmed = false;
341 
342  while (list[motid].checked < list[motid].allele1.Length())
343  {
344  int current = list[motid].allele1.Length() - 1;
345  bool saved = false;
346 
347  // Pair it with each possible paternal genotype
348  for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--)
349  {
350  int matches = 0;
351 
352  // Find out if the pairing is compatible with at least one genotype for each child
353  for (int j = 0; j < person.sibCount; j++)
354  {
355  int sibid = person.sibs[j]->traverse;
356  int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX;
357 
358  // Since we have done the pairwise check, there is nothing more
359  // to do for males ...
360  if (list[sibid].ignore || maleX)
361  {
362  matches++;
363  continue;
364  }
365 
366  for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--)
367  if (CheckTrio(list, motid, fatid, sibid, current, i, k))
368  {
369  matches++;
370  break;
371  }
372 
373  if (matches != j + 1)
374  break;
375  }
376 
377  // Save maternal and paternal genotypes, mark all compatible sibling genotypes
378  if (matches == person.sibCount)
379  {
380  for (int j = 0; j < person.sibCount; j++)
381  {
382  int sibid = person.sibs[j]->traverse;
383 
384  for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++)
385  if (CheckTrio(list, motid, fatid, sibid, current, i, k))
386  list[sibid].SaveGenotype(k);
387  }
388 
389  list[motid].SaveGenotype(current);
390  list[fatid].SaveGenotype(i);
391 
392  saved = true;
393 
394  break;
395  }
396  }
397 
398  if (!saved)
399  {
400  list[motid].Delete(current);
401  trimmed = true;
402  }
403  }
404 
405  return trimmed;
406 }
407 
408 bool GenotypeList::Cleanup(GenotypeList * list, Person & person, int motid, int fatid, int child, int geno)
409 {
410  for (int current = 0; current < list[motid].allele1.Length(); current++)
411  for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--)
412  if (CheckTrio(list, motid, fatid, child, current, i, geno))
413  {
414  int matches = 0;
415 
416  // Find out if the pairing is compatible with at least one genotype for each child
417  for (int j = 0; j < person.sibCount; j++)
418  {
419  int sibid = person.sibs[j]->traverse;
420  int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX;
421 
422  // After completing the pairwise check, all males are guaranteed
423  // to be compatible with their mothers
424  if (list[sibid].ignore || maleX)
425  {
426  matches++;
427  continue;
428  }
429 
430  for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--)
431  if (CheckTrio(list, motid, fatid, sibid, current, i, k))
432  {
433  matches++;
434  break;
435  }
436 
437  if (matches != j + 1)
438  break;
439  }
440 
441  // Update list of compatible sibling genotypes
442  if (matches == person.sibCount)
443  for (int j = 0; j < person.sibCount; j++)
444  {
445  int sibid = person.sibs[j]->traverse;
446 
447  for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++)
448  if (CheckTrio(list, motid, fatid, sibid, current, i, k))
449  list[sibid].SaveGenotype(k);
450 
451  return false;
452  }
453  }
454 
455  list[child].Delete(geno);
456 
457  return true;
458 }
459 
460 void GenotypeList::Print(GenotypeList * list, Pedigree & ped, Family * family, int marker)
461 {
462  MarkerInfo * info = ped.GetMarkerInfo(marker);
463 
464  for (int i = 0; i < family->count; i++)
465  {
466  printf("%s - ", (const char *) ped[family->path[i]].pid);
467 
468  for (int j = 0; j < list[i].allele1.Length(); j++)
469  {
470  if (list[i].allele1[j] == -1)
471  printf("*/");
472  else
473  printf("%s/", (const char *) info->GetAlleleLabel(list[i].allele1[j]));
474 
475  if (list[i].allele2[j] == -1)
476  printf("* ");
477  else
478  printf("%s ", (const char *) info->GetAlleleLabel(list[i].allele2[j]));
479  }
480 
481  printf("\n");
482  }
483  printf("\n");
484 }
485 
Pedigree
Definition: Pedigree.h:32
MarkerInfo
Definition: PedigreeGlobals.h:29
Family
Definition: PedigreeFamily.h:27
Person
Definition: PedigreePerson.h:31
GenotypeList
Definition: GenotypeLists.h:23