RNA Folding simulation
rna_folding.hh
Go to the documentation of this file.
1 
10 #include <algorithm>
11 #include <iostream>
12 #include <vector>
13 #include <fstream>
14 #include "herrlog.hh"
15 
22 std::vector<std::vector<int>> create_matrix(
23  const std::string& rna_sequence, const int& minimal_loop_length = 0) {
24  std::vector<std::vector<int>> dp(rna_sequence.size(),
25  std::vector<int>(rna_sequence.size(), 0));
26 
27  for (size_t k = 1; k < rna_sequence.size(); k++) {
28  for (size_t i = 0; i < rna_sequence.size() - k; i++) {
29  size_t j = i + k;
30 
31  if (j - i >= minimal_loop_length) {
32  int rc = INT32_MIN;
33  for (size_t t = i; t < j; t++) {
34  rc = std::max(rc, dp[i][t] + dp[t + 1][j]);
35  }
36 
37  dp[i][j] = std::max(
38  {dp[i + 1][j], dp[i][j - 1],
39  dp[i + 1][j - 1] +
40  (rna_sequence[i] == 'A' && rna_sequence[j] == 'U' ||
41  rna_sequence[i] == 'U' && rna_sequence[j] == 'A' ||
42  rna_sequence[i] == 'C' && rna_sequence[j] == 'G' ||
43  rna_sequence[i] == 'G' && rna_sequence[j] == 'C'),
44  rc});
45  } else {
46  dp[i][j] = 0;
47  }
48  }
49  }
50 
51  return dp;
52 }
53 
61 int rna_score(const std::string& rna_sequence,
62  const int& minimal_loop_length = 0) {
63  std::vector<std::vector<int>> dp =
64  create_matrix(rna_sequence, minimal_loop_length);
65  return dp[0][rna_sequence.size() - 1];
66 }
67 
77 void traceback(const std::vector<std::vector<int>>& nm, const std::string& rna,
78  std::vector<std::pair<int, int>>& fold, int i, int j) {
79  if (i < j) {
80  if (nm[i][j] == nm[i + 1][j]) { // 1st rule
81  traceback(nm, rna, fold, i + 1, j);
82  } else if (nm[i][j] == nm[i][j - 1]) { // 2nd rule
83  traceback(nm, rna, fold, i, j - 1);
84  } else if (nm[i][j] ==
85  nm[i + 1][j - 1] +
86  (rna[i] == 'A' && rna[j] == 'U' ||
87  rna[i] == 'U' && rna[j] == 'A' ||
88  rna[i] == 'C' && rna[j] == 'G' ||
89  rna[i] == 'G' && rna[j] == 'C')) { // 3rd rule
90  fold.push_back(std::make_pair(i, j));
91  traceback(nm, rna, fold, i + 1, j - 1);
92  } else {
93  for (int k = i + 1; k < j - 1; k++) {
94  if (nm[i][j] == nm[i][k] + nm[k + 1][j]) { // 4th rule
95  traceback(nm, rna, fold, i, k);
96  traceback(nm, rna, fold, k + 1, j);
97  break;
98  }
99  }
100  }
101  }
102 }
103 
111 std::string dot_write(const std::string& rna,
112  const std::vector<std::pair<int, int>>& fold) {
113  std::string dot(rna.size(), '.');
114 
115  for (const auto& s : fold) {
116  dot[std::min(s.first, s.second)] = '(';
117  dot[std::max(s.first, s.second)] = ')';
118  }
119 
120  return dot;
121 }
122 
129 void dot_bracket_to_dot(const std::string& sequence, const std::string& structure) {
130  std::string dot = "graph RNA {\n";
131  dot += " bgcolor=\"transparent\";\n";
132  dot += " splines=polyline;\n";
133  dot += " overlap=scale;\n";
134  dot += " size=\"50,50\";\n";
135  for (size_t i = 0; i < sequence.size(); ++i) {
136  std::string color;
137  switch (sequence[i]) {
138  case 'A': color = "red"; break;
139  case 'C': color = "blue"; break;
140  case 'G': color = "green"; break;
141  case 'U': color = "yellow"; break;
142  }
143  dot += " " + std::to_string(i) + " [label=\"" + sequence[i] + "\", fontcolor=\"black\", fillcolor=\"" + color + "\", style=filled];\n";
144  if (i > 0) { // Add an edge between consecutive bases
145  dot += " " + std::to_string(i - 1) + " -- " + std::to_string(i) + " [color=white, penwidth=10.0];\n";
146  }
147  }
148  std::vector<int> stack;
149  for (size_t i = 0; i < structure.size(); ++i) {
150  if (structure[i] == '(') {
151  stack.push_back(i);
152  } else if (structure[i] == ')') {
153  int j = stack.back();
154  stack.pop_back();
155  dot += " " + std::to_string(j) + " -- " + std::to_string(i) + " [color=lightgrey, penwidth=10.0];\n";
156  }
157  }
158  dot += "}\n";
159 
160  // Write the DOT script to a file
161  std::ofstream file("rna.dot");
162  file << dot;
163  file.close();
164 
165  // Run the neato program to generate a plot of the graph
166  int result = system("neato -Tpng rna.dot -o rna.png");
167  if (result != 0) {
168  Logger::error("Failed to run the neato program.");
169  }
170 }
static void error(const char *format, Args... args)
Logs messages of type error. Furthermore closes the output file and exits the program.
Definition: herrlog.hh:326
Header file logging library.
std::vector< std::vector< int > > create_matrix(const std::string &rna_sequence, const int &minimal_loop_length=0)
Function to create the DP matrix for RNA folding.
Definition: rna_folding.hh:22
std::string dot_write(const std::string &rna, const std::vector< std::pair< int, int >> &fold)
Function to create the dot-bracket notation from the bonds.
Definition: rna_folding.hh:111
void traceback(const std::vector< std::vector< int >> &nm, const std::string &rna, std::vector< std::pair< int, int >> &fold, int i, int j)
Function to traceback DP and get the bonds structure.
Definition: rna_folding.hh:77
void dot_bracket_to_dot(const std::string &sequence, const std::string &structure)
Creates a DOT script from the RNA sequence and structure and calls graphviz.
Definition: rna_folding.hh:129
int rna_score(const std::string &rna_sequence, const int &minimal_loop_length=0)
Function to calculate number of bonds (theoretical) in the RNA.
Definition: rna_folding.hh:61