tree.c 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #include <stdio.h>
  2. #include "tree.h"
  3. void sq_insert_char(seqtree* tree, char c, float count) {
  4. VEC_EACHP(&tree->chars, i, ch) {
  5. if(ch->c == c) {
  6. ch->prob += count;
  7. return;
  8. }
  9. }
  10. VEC_PUSH(&tree->chars, ((seqchar){
  11. .c = c,
  12. .prob = count,
  13. .kids = NULL,
  14. }));
  15. }
  16. void sq_insert_seq(seqtree* tree, char* seq, int n, float count) {
  17. if(n <= 1) {
  18. sq_insert_char(tree, *seq, count);
  19. return;
  20. }
  21. // look for the right subtree to decend
  22. VEC_EACHP(&tree->chars, i, ch) {
  23. if(ch->c == *seq) {
  24. if(!ch->kids) {
  25. ch->kids = calloc(1, sizeof(*ch->kids));
  26. }
  27. sq_insert_seq(ch->kids, seq + 1, n - 1, count);
  28. return;
  29. }
  30. }
  31. // create a new subtree
  32. seqtree* k = calloc(1, sizeof(seqtree));
  33. VEC_PUSH(&tree->chars, ((seqchar){
  34. .c = *seq,
  35. .prob = 0,
  36. .kids = k,
  37. }));
  38. sq_insert_seq(k, seq + 1, n - 1, count);
  39. }
  40. void sq_total_and_invert(seqtree* tree) {
  41. float sum = 0;
  42. VEC_EACHP(&tree->chars, i, ch) {
  43. sum += ch->prob;
  44. }
  45. if(sum > 0) {
  46. sum = 1.0 / sum;
  47. VEC_EACHP(&tree->chars, i, ch) {
  48. ch->prob *= sum;
  49. }
  50. }
  51. VEC_EACHP(&tree->chars, i, ch) {
  52. if(ch->kids) sq_total_and_invert(ch->kids);
  53. }
  54. }
  55. seqtree* sq_descend(seqtree* tree, char* seq, int depth) {
  56. if(depth == 0) return tree;
  57. VEC_EACHP(&tree->chars, i, ch) {
  58. if(ch->c == *seq) {
  59. if(!ch->kids) return NULL;
  60. return sq_descend(ch->kids, seq + 1, depth - 1);
  61. }
  62. }
  63. return NULL;
  64. }
  65. char sq_sample(seqtree* tree) {
  66. float pval = frandNorm();
  67. float sum = 0;
  68. VEC_EACHP(&tree->chars, i, ch) {
  69. sum += ch->prob;
  70. if(sum >= pval) {
  71. return ch->c;
  72. }
  73. }
  74. return -1;
  75. }
  76. char sq_sample_depth(seqtree* tree, char* seq, int depth) {
  77. seqtree* t = sq_descend(tree, seq, depth);
  78. if(!t) return -1;
  79. return sq_sample(t);
  80. }
  81. float sq_prob_depth(seqtree* tree, char* seq, int depth) {
  82. if(!tree) return -1;
  83. VEC_EACHP(&tree->chars, i, ch) {
  84. if(ch->c == *seq) {
  85. if(depth <= 1) {
  86. return ch->prob;
  87. }
  88. return sq_prob_depth(ch->kids, seq + 1, depth - 1);
  89. }
  90. }
  91. return -1;
  92. }
  93. long sq_memstats(seqtree* tree) {
  94. long sum = sizeof(*tree) + VEC_ALLOC(&tree->chars) * sizeof(VEC_ITEM(&tree->chars, 0));
  95. VEC_EACHP(&tree->chars, i, ch) {
  96. if(ch->kids) sum += sq_memstats(ch->kids);
  97. }
  98. return sum;
  99. }
  100. void sq_print_tree(seqtree* tree, int depth) {
  101. VEC_EACHP(&tree->chars, i, ch) {
  102. for(int i = 0; i < depth; i++) printf(" ");
  103. printf("%c - %f\n", ch->c, ch->prob);
  104. if(ch->kids) sq_print_tree(ch->kids, depth + 2);
  105. }
  106. }