File size: 1,473 Bytes
c2a02c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c451111
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import requests as r
from io import StringIO
from Bio import SeqIO
import xml.etree.ElementTree as ET

def get_uniprot_seq(protein_id):
    print('Fetching UniProt Sequences for ID: ', protein_id)
    baseUrl = "http://www.uniprot.org/uniprot/"
    currentUrl = baseUrl + protein_id + ".fasta"
    response = r.post(currentUrl)
    cData = ''.join(response.text)
    Seq = StringIO(cData)
    pSeq = list(SeqIO.parse(Seq, 'fasta'))
    try:
        return str(pSeq[0].seq)
    except:
        IndexError
        return str('')


def get_isoforms(protein_id):
    print('Fetching UniProt Isoforms for ID: ', protein_id)
    try:
        # a dictionary storing the sequence of your isoforms, key: accesion number, value: sequence
        isoforms = dict()
        # make a call to EBI API
        req = r.get('https://www.ebi.ac.uk/proteins/api/proteins/{}/isoforms'.format(protein_id))
        # parse the returned XML
        uniprot = ET.fromstring(req.text)
        for isoform in uniprot:
            # get the sequence
            seq = isoform.find('{http://uniprot.org/uniprot}sequence')

            # get the accession number
            iso_accession = isoform.find('{http://uniprot.org/uniprot}accession')

            # add the values to the dictionary
            if seq.text and iso_accession.text:
                isoforms[iso_accession.text] = seq.text
        return isoforms
    except:
        AttributeError
        isoforms = {}
        return isoforms