Spaces:
Runtime error
Runtime error
File size: 3,642 Bytes
3505899 d6579b5 3505899 d6579b5 3505899 f93d314 3505899 490445b 3505899 490445b 3505899 6678ed6 2c475a1 6678ed6 2c475a1 d770ec6 2c475a1 6678ed6 2c475a1 d770ec6 2c475a1 d770ec6 2c475a1 d770ec6 2c475a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
from AssistantService import GPTAssistant
from openai.error import AuthenticationError
import streamlit as st
import configparser
import os
config = configparser.ConfigParser()
config.read('config.ini')
if 'DEFAULT' in config:
assistant_api_key = config['DEFAULT'].get('API-KEY', '')
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
st.title("Web Scraping Assistant")
st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you.")
st.write("Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)")
if assistant_api_key == '':
assistant_api_key = st.text_input("Paste your API key here:")
if assistant_api_key:
gpt_assistant = GPTAssistant(assistant_api_key)
else:
gpt_assistant = GPTAssistant(assistant_api_key)
html_content = st.text_input("Paste your piece of HTML here:")
extract_button = st.button("Extract data format")
if html_content and extract_button:
try:
output = gpt_assistant.chain_response_format(html_content)
st.session_state['output_format'] = output
except NameError:
st.write("Complete the API key field")
except AuthenticationError:
st.write("Invalid API key")
if 'output_format' in st.session_state:
output_format = st.code(st.session_state['output_format'], language="json")
if st.button("Generate the code"):
try:
python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content)
st.session_state['code_generated'] = python_code
st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)"
except NameError:
st.write("Complete the API key field")
except AuthenticationError:
st.write("Invalid API key")
if 'code_generated' in st.session_state:
python_function_label = st.write("Here is your python function:")
code_generated = st.code(st.session_state['code_generated'],language="python")
full_content = st.text_input("Paste your complete HTML here:")
test_code = st.button("Test the code")
if full_content and test_code:
html_data = full_content
result = None
exec(st.session_state['code_generated_exec'], globals())
if result:
st.write("data extracted successfully")
# show data in table
st.table(result)
else:
st.write("error extracting data")
with st.expander(label="How to use this app"):
st.write("1. Paste the html code of your target element in the first text box and press \"Enter\"")
example = st.button("Show example")
if example:
example = False
text_area = st.text_area("Example", value='<li><div class="product"> <h3 class="title">Product 1</h3> <p class="description">This is the description of the product 1</p> <span class="price">10.00</span> </div></li>')
close_example = st.button("Close example")
if close_example:
example = False
close_example.disabled = True
text_area = None
st.write("2. Click on the button 'Extract data format'")
st.write("3. Click on the button 'Generate the code'")
st.write("4. Paste the complete html code in the last text box to test the auto generated code")
st.write("5. Copy the code and include it in your own projects")
|