Spaces:

hoang1007
/

spelling-correction

Runtime error

App Files Files Community

spelling-correction / utils /alignment.py

hoang1007

Upload 69 files

44db343 about 1 year ago

raw

history blame

3.38 kB

	#This software is a free software. Thus, it is licensed under GNU General Public License.
	#Python implementation to Smith-Waterman Algorithm for Homework 1 of Bioinformatics class.
	#Forrest Bao, Sept. 26 <http://fsbao.net> <forrest.bao aT gmail.com>

	# zeros() was origianlly from NumPy.
	# This version is implemented by alevchuk 2011-04-10
	def zeros(shape):
	retval = []
	for x in range(shape[0]):
	retval.append([])
	for y in range(shape[1]):
	retval[-1].append(0)
	return retval

	match_award = 10
	mismatch_penalty = -5
	gap_penalty = -5 # both for opening and extanding

	def match_score(alpha, beta, gap_symbol = "-"):
	if alpha == beta:
	return match_award
	elif alpha == gap_symbol or beta == gap_symbol:
	return gap_penalty
	else:
	return mismatch_penalty

	def finalize(align1, align2, gap_symbol = "-"):
	align1 = align1[::-1] #reverse sequence 1
	align2 = align2[::-1] #reverse sequence 2

	i,j = 0,0

	#calcuate identity, score and aligned sequeces
	symbol = ''
	for i in range(0,len(align1)):
	# if two AAs are the same, then output the letter
	if align1[i] == align2[i]:
	symbol = symbol + align1[i]

	# if they are not identical and none of them is gap
	elif align1[i] != align2[i] and align1[i] != gap_symbol and align2[i] != gap_symbol:
	symbol += ' '
	found = 0

	#if one of them is a gap, output a space
	elif align1[i] == gap_symbol or align2[i] == gap_symbol:
	symbol += ' '

	return align1, align2


	def needle(seq1, seq2, gap_symbol = "-"):
	m, n = len(seq1), len(seq2) # length of two sequences

	# Generate DP table and traceback path pointer matrix
	score = zeros((m+1, n+1)) # the DP table

	# Calculate DP table
	for i in range(0, m + 1):
	score[i][0] = gap_penalty * i
	for j in range(0, n + 1):
	score[0][j] = gap_penalty * j
	for i in range(1, m + 1):
	for j in range(1, n + 1):
	match = score[i - 1][j - 1] + match_score(seq1[i-1], seq2[j-1])
	delete = score[i - 1][j] + gap_penalty
	insert = score[i][j - 1] + gap_penalty
	score[i][j] = max(match, delete, insert)

	# Traceback and compute the alignment
	align1, align2 = '', ''
	i,j = m,n # start from the bottom right cell
	while i > 0 and j > 0: # end toching the top or the left edge
	score_current = score[i][j]
	score_diagonal = score[i-1][j-1]
	score_up = score[i][j-1]
	score_left = score[i-1][j]

	if score_current == score_diagonal + match_score(seq1[i-1], seq2[j-1]):
	align1 += seq1[i-1]
	align2 += seq2[j-1]
	i -= 1
	j -= 1
	elif score_current == score_left + gap_penalty:
	align1 += seq1[i-1]
	align2 += gap_symbol
	i -= 1
	elif score_current == score_up + gap_penalty:
	align1 += gap_symbol
	align2 += seq2[j-1]
	j -= 1

	# Finish tracing up to the top left cell
	while i > 0:
	align1 += seq1[i-1]
	align2 += gap_symbol
	i -= 1
	while j > 0:
	align1 += gap_symbol
	align2 += seq2[j-1]
	j -= 1

	return finalize(align1, align2)