#This software is a free software. Thus, it is licensed under GNU General Public License. #Python implementation to Smith-Waterman Algorithm for Homework 1 of Bioinformatics class. #Forrest Bao, Sept. 26 # zeros() was origianlly from NumPy. # This version is implemented by alevchuk 2011-04-10 def zeros(shape): retval = [] for x in range(shape[0]): retval.append([]) for y in range(shape[1]): retval[-1].append(0) return retval match_award = 10 mismatch_penalty = -5 gap_penalty = -5 # both for opening and extanding def match_score(alpha, beta, gap_symbol = "-"): if alpha == beta: return match_award elif alpha == gap_symbol or beta == gap_symbol: return gap_penalty else: return mismatch_penalty def finalize(align1, align2, gap_symbol = "-"): align1 = align1[::-1] #reverse sequence 1 align2 = align2[::-1] #reverse sequence 2 i,j = 0,0 #calcuate identity, score and aligned sequeces symbol = '' for i in range(0,len(align1)): # if two AAs are the same, then output the letter if align1[i] == align2[i]: symbol = symbol + align1[i] # if they are not identical and none of them is gap elif align1[i] != align2[i] and align1[i] != gap_symbol and align2[i] != gap_symbol: symbol += ' ' found = 0 #if one of them is a gap, output a space elif align1[i] == gap_symbol or align2[i] == gap_symbol: symbol += ' ' return align1, align2 def needle(seq1, seq2, gap_symbol = "-"): m, n = len(seq1), len(seq2) # length of two sequences # Generate DP table and traceback path pointer matrix score = zeros((m+1, n+1)) # the DP table # Calculate DP table for i in range(0, m + 1): score[i][0] = gap_penalty * i for j in range(0, n + 1): score[0][j] = gap_penalty * j for i in range(1, m + 1): for j in range(1, n + 1): match = score[i - 1][j - 1] + match_score(seq1[i-1], seq2[j-1]) delete = score[i - 1][j] + gap_penalty insert = score[i][j - 1] + gap_penalty score[i][j] = max(match, delete, insert) # Traceback and compute the alignment align1, align2 = '', '' i,j = m,n # start from the bottom right cell while i > 0 and j > 0: # end toching the top or the left edge score_current = score[i][j] score_diagonal = score[i-1][j-1] score_up = score[i][j-1] score_left = score[i-1][j] if score_current == score_diagonal + match_score(seq1[i-1], seq2[j-1]): align1 += seq1[i-1] align2 += seq2[j-1] i -= 1 j -= 1 elif score_current == score_left + gap_penalty: align1 += seq1[i-1] align2 += gap_symbol i -= 1 elif score_current == score_up + gap_penalty: align1 += gap_symbol align2 += seq2[j-1] j -= 1 # Finish tracing up to the top left cell while i > 0: align1 += seq1[i-1] align2 += gap_symbol i -= 1 while j > 0: align1 += gap_symbol align2 += seq2[j-1] j -= 1 return finalize(align1, align2)