File size: 2,440 Bytes
3505899
 
 
 
 
 
 
 
 
 
 
 
90fdd1e
3505899
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from AssistantService import GPTAssistant
from openai.error import AuthenticationError
import streamlit as st
import configparser

config = configparser.ConfigParser()
config.read('config.ini')
if 'DEFAULT' in config:
    assistant_api_key = config['DEFAULT'].get('API-KEY', '')

st.title("Web Scraping Assistant")
st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you.")
st.write("Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)")
if assistant_api_key == '':
    assistant_api_key = st.text_input("Paste your API key here:")
    if assistant_api_key:
        gpt_assistant = GPTAssistant(assistant_api_key)
else:
    gpt_assistant = GPTAssistant(assistant_api_key)

html_content = st.text_input("Paste your piece of HTML here:")

if html_content:
    if st.button("Extract data format"):
        try:
            output = gpt_assistant.chain_response_format(html_content)
            st.session_state['output_format'] = output
        except NameError:
            st.write("Complete the API key field")
        except AuthenticationError:
            st.write("Invalid API key")

if 'output_format' in st.session_state:
    output_format = st.code(st.session_state['output_format'], language="json")
    
    if st.button("Generate the code"):
        try:
            python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content)
            st.session_state['code_generated'] = python_code
            st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)"

        except NameError:
            st.write("Complete the API key field")
        except AuthenticationError:
            st.write("Invalid API key")


if 'code_generated' in st.session_state:
    python_function_label = st.write("Here is your python function:")
    code_generated = st.code(st.session_state['code_generated'],language="python")
    full_content = st.text_input("Paste your complete HTML here:")
    if full_content and st.button("Test the code"):
        html_data = full_content
        result = None
        exec(st.session_state['code_generated_exec'], globals())
        if result:
            st.write("data extracted successfully")
            # show data in table
            st.table(result)
        else:
            st.write("error extracting data")