给定两个字符串作为输入, = , 和 = ,输出字符串的对准的,每个,从而使净惩罚为最小。罚款计算如下:
1. 罚款如果在字符串之间插入间隙,则会发生。
2. 罚款发生错误匹配的字符和 .
例子:
Input : X = CG, Y = CA, p_gap = 3, p_xy = 7
Output : X = CG_, Y = C_A, Total penalty = 6
Input : X = AGGGCT, Y = AGGCA, p_gap = 3, p_xy = 2
Output : X = AGGGCT, Y = A_GGCA, Total penalty = 5
Input : X = CG, Y = CA, p_gap = 3, p_xy = 5
Output : X = CG, Y = CA, Total penalty = 5
关于问题历史的简要说明
序列比对问题是生物科学的基本问题之一,旨在发现两个氨基酸序列的相似性。比较氨基酸对人类至关重要,因为它提供了关于进化和发育的重要信息。 Saul B. Needleman 和 Christian D. Wunsch 针对这个问题设计了一个动态规划算法并于 1970 年发表。从那时起,已经进行了大量改进以提高时间复杂度和空间复杂度,但是这些超出了本文讨论的范围这个帖子。
解决方案我们可以使用动态规划来解决这个问题。可行的解决方案是在字符串引入间隙,以使长度相等。因为可以很容易地证明,在均衡长度后增加额外的间隙只会导致惩罚的增加。
最优子结构
可以从最优解中观察到,例如从给定的样本输入中,最优解缩小到只有三个候选。
1. 和 .
2. 和差距。
3. 差距和 .
最优子结构的证明。
我们可以很容易地用反证法来证明。让是和是 .假设诱导对齐 , 有一些惩罚 , 并且竞争者对齐有惩罚 , 和 .
现在,附加和 ,我们得到了惩罚的对齐 .这与原始对齐的最优性相矛盾 .
因此,证明。
让是最优对齐的惩罚和 .然后,从最优子结构, .
因此,总的最低惩罚是, .
重构解决方案
重建,
1. 追溯填满的表格,开始 .
2.什么时候
…..2a。如果它是使用 case 1 填充的,请转到 .
…..2b。如果它是使用案例 2 填充的,请转到 .
…..2c。如果它是使用 case 3 填充的,请转到 .
3. 如果 i = 0 或 j = 0,则用间隙匹配剩余的子串。
下面是上述解决方案的实现。
C++
// CPP program to implement sequence alignment
// problem.
#include
using namespace std;
// function to find out the minimum penalty
void getMinimumPenalty(string x, string y, int pxy, int pgap)
{
int i, j; // initialising variables
int m = x.length(); // length of gene1
int n = y.length(); // length of gene2
// table for storing optimal substructure answers
int dp[m+1][n+1] = {0};
// initialising the table
for (i = 0; i <= (n+m); i++)
{
dp[i][0] = i * pgap;
dp[0][i] = i * pgap;
}
// calcuting the minimum penalty
for (i = 1; i <= m; i++)
{
for (j = 1; j <= n; j++)
{
if (x[i - 1] == y[j - 1])
{
dp[i][j] = dp[i - 1][j - 1];
}
else
{
dp[i][j] = min({dp[i - 1][j - 1] + pxy ,
dp[i - 1][j] + pgap ,
dp[i][j - 1] + pgap });
}
}
}
// Reconstructing the solution
int l = n + m; // maximum possible length
i = m; j = n;
int xpos = l;
int ypos = l;
// Final answers for the respective strings
int xans[l+1], yans[l+1];
while ( !(i == 0 || j == 0))
{
if (x[i - 1] == y[j - 1])
{
xans[xpos--] = (int)x[i - 1];
yans[ypos--] = (int)y[j - 1];
i--; j--;
}
else if (dp[i - 1][j - 1] + pxy == dp[i][j])
{
xans[xpos--] = (int)x[i - 1];
yans[ypos--] = (int)y[j - 1];
i--; j--;
}
else if (dp[i - 1][j] + pgap == dp[i][j])
{
xans[xpos--] = (int)x[i - 1];
yans[ypos--] = (int)'_';
i--;
}
else if (dp[i][j - 1] + pgap == dp[i][j])
{
xans[xpos--] = (int)'_';
yans[ypos--] = (int)y[j - 1];
j--;
}
}
while (xpos > 0)
{
if (i > 0) xans[xpos--] = (int)x[--i];
else xans[xpos--] = (int)'_';
}
while (ypos > 0)
{
if (j > 0) yans[ypos--] = (int)y[--j];
else yans[ypos--] = (int)'_';
}
// Since we have assumed the answer to be n+m long,
// we need to remove the extra gaps in the starting
// id represents the index from which the arrays
// xans, yans are useful
int id = 1;
for (i = l; i >= 1; i--)
{
if ((char)yans[i] == '_' && (char)xans[i] == '_')
{
id = i + 1;
break;
}
}
// Printing the final answer
cout << "Minimum Penalty in aligning the genes = ";
cout << dp[m][n] << "\n";
cout << "The aligned genes are :\n";
for (i = id; i <= l; i++)
{
cout<<(char)xans[i];
}
cout << "\n";
for (i = id; i <= l; i++)
{
cout << (char)yans[i];
}
return;
}
// Driver code
int main(){
// input strings
string gene1 = "AGGGCT";
string gene2 = "AGGCA";
// initialsing penalties of different types
int misMatchPenalty = 3;
int gapPenalty = 2;
// calling the function to calculate the result
getMinimumPenalty(gene1, gene2,
misMatchPenalty, gapPenalty);
return 0;
}
Java
// Java program to implement
// sequence alignment problem.
import java.io.*;
import java.util.*;
import java.lang.*;
class GFG
{
// function to find out
// the minimum penalty
static void getMinimumPenalty(String x, String y,
int pxy, int pgap)
{
int i, j; // initialising variables
int m = x.length(); // length of gene1
int n = y.length(); // length of gene2
// table for storing optimal
// substructure answers
int dp[][] = new int[n + m + 1][n + m + 1];
for (int[] x1 : dp)
Arrays.fill(x1, 0);
// initialising the table
for (i = 0; i <= (n + m); i++)
{
dp[i][0] = i * pgap;
dp[0][i] = i * pgap;
}
// calcuting the
// minimum penalty
for (i = 1; i <= m; i++)
{
for (j = 1; j <= n; j++)
{
if (x.charAt(i - 1) == y.charAt(j - 1))
{
dp[i][j] = dp[i - 1][j - 1];
}
else
{
dp[i][j] = Math.min(Math.min(dp[i - 1][j - 1] + pxy ,
dp[i - 1][j] + pgap) ,
dp[i][j - 1] + pgap );
}
}
}
// Reconstructing the solution
int l = n + m; // maximum possible length
i = m; j = n;
int xpos = l;
int ypos = l;
// Final answers for
// the respective strings
int xans[] = new int[l + 1];
int yans[] = new int[l + 1];
while ( !(i == 0 || j == 0))
{
if (x.charAt(i - 1) == y.charAt(j - 1))
{
xans[xpos--] = (int)x.charAt(i - 1);
yans[ypos--] = (int)y.charAt(j - 1);
i--; j--;
}
else if (dp[i - 1][j - 1] + pxy == dp[i][j])
{
xans[xpos--] = (int)x.charAt(i - 1);
yans[ypos--] = (int)y.charAt(j - 1);
i--; j--;
}
else if (dp[i - 1][j] + pgap == dp[i][j])
{
xans[xpos--] = (int)x.charAt(i - 1);
yans[ypos--] = (int)'_';
i--;
}
else if (dp[i][j - 1] + pgap == dp[i][j])
{
xans[xpos--] = (int)'_';
yans[ypos--] = (int)y.charAt(j - 1);
j--;
}
}
while (xpos > 0)
{
if (i > 0) xans[xpos--] = (int)x.charAt(--i);
else xans[xpos--] = (int)'_';
}
while (ypos > 0)
{
if (j > 0) yans[ypos--] = (int)y.charAt(--j);
else yans[ypos--] = (int)'_';
}
// Since we have assumed the
// answer to be n+m long,
// we need to remove the extra
// gaps in the starting id
// represents the index from
// which the arrays xans,
// yans are useful
int id = 1;
for (i = l; i >= 1; i--)
{
if ((char)yans[i] == '_' &&
(char)xans[i] == '_')
{
id = i + 1;
break;
}
}
// Printing the final answer
System.out.print("Minimum Penalty in " +
"aligning the genes = ");
System.out.print(dp[m][n] + "\n");
System.out.println("The aligned genes are :");
for (i = id; i <= l; i++)
{
System.out.print((char)xans[i]);
}
System.out.print("\n");
for (i = id; i <= l; i++)
{
System.out.print((char)yans[i]);
}
return;
}
// Driver code
public static void main(String[] args)
{
// input strings
String gene1 = "AGGGCT";
String gene2 = "AGGCA";
// initialising penalties
// of different types
int misMatchPenalty = 3;
int gapPenalty = 2;
// calling the function to
// calculate the result
getMinimumPenalty(gene1, gene2,
misMatchPenalty, gapPenalty);
}
}
C#
// C# program to implement sequence alignment
// problem.
using System;
class GFG
{
// function to find out the minimum penalty
public static void getMinimumPenalty(string x, string y, int pxy, int pgap)
{
int i, j; // initialising variables
int m = x.Length; // length of gene1
int n = y.Length; // length of gene2
// table for storing optimal substructure answers
int[,] dp = new int[n+m+1,n+m+1];
for(int q = 0; q < n+m+1; q++)
for(int w = 0; w < n+m+1; w++)
dp[q,w] = 0;
// initialising the table
for (i = 0; i <= (n+m); i++)
{
dp[i,0] = i * pgap;
dp[0,i] = i * pgap;
}
// calcuting the minimum penalty
for (i = 1; i <= m; i++)
{
for (j = 1; j <= n; j++)
{
if (x[i - 1] == y[j - 1])
{
dp[i,j] = dp[i - 1,j - 1];
}
else
{
dp[i,j] = Math.Min(Math.Min(dp[i - 1,j - 1] + pxy ,
dp[i - 1,j] + pgap) ,
dp[i,j - 1] + pgap );
}
}
}
// Reconstructing the solution
int l = n + m; // maximum possible length
i = m; j = n;
int xpos = l;
int ypos = l;
// Final answers for the respective strings
int[] xans = new int[l+1];
int [] yans = new int[l+1];
while ( !(i == 0 || j == 0))
{
if (x[i - 1] == y[j - 1])
{
xans[xpos--] = (int)x[i - 1];
yans[ypos--] = (int)y[j - 1];
i--; j--;
}
else if (dp[i - 1,j - 1] + pxy == dp[i,j])
{
xans[xpos--] = (int)x[i - 1];
yans[ypos--] = (int)y[j - 1];
i--; j--;
}
else if (dp[i - 1,j] + pgap == dp[i,j])
{
xans[xpos--] = (int)x[i - 1];
yans[ypos--] = (int)'_';
i--;
}
else if (dp[i,j - 1] + pgap == dp[i,j])
{
xans[xpos--] = (int)'_';
yans[ypos--] = (int)y[j - 1];
j--;
}
}
while (xpos > 0)
{
if (i > 0) xans[xpos--] = (int)x[--i];
else xans[xpos--] = (int)'_';
}
while (ypos > 0)
{
if (j > 0) yans[ypos--] = (int)y[--j];
else yans[ypos--] = (int)'_';
}
// Since we have assumed the answer to be n+m long,
// we need to remove the extra gaps in the starting
// id represents the index from which the arrays
// xans, yans are useful
int id = 1;
for (i = l; i >= 1; i--)
{
if ((char)yans[i] == '_' && (char)xans[i] == '_')
{
id = i + 1;
break;
}
}
// Printing the final answer
Console.Write("Minimum Penalty in aligning the genes = " + dp[m,n] + "\n");
Console.Write("The aligned genes are :\n");
for (i = id; i <= l; i++)
{
Console.Write((char)xans[i]);
}
Console.Write("\n");
for (i = id; i <= l; i++)
{
Console.Write((char)yans[i]);
}
return;
}
// Driver code
static void Main()
{
// input strings
string gene1 = "AGGGCT";
string gene2 = "AGGCA";
// initialising penalties of different types
int misMatchPenalty = 3;
int gapPenalty = 2;
// calling the function to calculate the result
getMinimumPenalty(gene1, gene2,
misMatchPenalty, gapPenalty);
}
//This code is contributed by DrRoot_
}
PHP
0)
{
if ($i > 0) $xans[$xpos--] = $x[--$i];
else $xans[$xpos--] = '_';
}
while ($ypos > 0)
{
if ($j > 0)
$yans[$ypos--] = $y[--$j];
else
$yans[$ypos--] = '_';
}
// Since we have assumed the
// answer to be n+m long,
// we need to remove the extra
// gaps in the starting
// id represents the index
// from which the arrays
// xans, yans are useful
$id = 1;
for ($i = $l; $i >= 1; $i--)
{
if ($yans[$i] == '_' &&
$xans[$i] == '_')
{
$id = $i + 1;
break;
}
}
// Printing the final answer
echo "Minimum Penalty in ".
"aligning the genes = ";
echo $dp[$m][$n] . "\n";
echo "The aligned genes are :\n";
for ($i = $id; $i <= $l; $i++)
{
echo $xans[$i];
}
echo "\n";
for ($i = $id; $i <= $l; $i++)
{
echo $yans[$i];
}
return;
}
// Driver code
// input strings
$gene1 = "AGGGCT";
$gene2 = "AGGCA";
// initialising penalties
// of different types
$misMatchPenalty = 3;
$gapPenalty = 2;
// calling the function
// to calculate the result
getMinimumPenalty($gene1, $gene2,
$misMatchPenalty, $gapPenalty);
// This code is contributed by Abhinav96
?>
Minimum Penalty in aligning the genes = 5
The aligned genes are :
AGGGCT
A_GGCA
时间复杂度:
空间复杂度: