from AssistantService import GPTAssistant from openai.error import AuthenticationError import streamlit as st import configparser import os config = configparser.ConfigParser() config.read('config.ini') if 'DEFAULT' in config: assistant_api_key = config['DEFAULT'].get('API-KEY', '') os.environ["LANGCHAIN_TRACING_V2"]="true" os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com" os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"] os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"] st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you. \n *Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)*") with st.expander(label="Check out the video demo"): yt_video = st.video("https://www.youtube.com/watch?v=_zeCun4OlCc") info_text = """ **Quick start** \n Fill the input with the HTML code you want to extract data from Example below: """ st.write(info_text) st.image("https://j.gifs.com/gpqvPl.gif") if assistant_api_key == '': assistant_api_key = st.text_input("Paste your API key here:") if assistant_api_key: gpt_assistant = GPTAssistant(assistant_api_key) else: gpt_assistant = GPTAssistant(assistant_api_key) html_content = st.text_input("Paste your piece of HTML here:") extract_button = st.button("Extract data format") if html_content and extract_button: try: output = gpt_assistant.chain_response_format(html_content) st.session_state['output_format'] = output except NameError: st.write("Complete the API key field") except AuthenticationError: st.write("Invalid API key") if 'output_format' in st.session_state: output_format = st.code(st.session_state['output_format'], language="json") if st.button("Generate the code"): try: python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content) st.session_state['code_generated'] = python_code st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)" except NameError: st.write("Complete the API key field") except AuthenticationError: st.write("Invalid API key") if 'code_generated' in st.session_state: python_function_label = st.write("Here is your python function:") code_generated = st.code(st.session_state['code_generated'],language="python") full_content = st.text_input("Paste your complete HTML here:") test_code = st.button("Test the code") if full_content and test_code: html_data = full_content result = None exec(st.session_state['code_generated_exec'], globals()) if result: st.write("data extracted successfully") # show data in table st.table(result) else: st.write("error extracting data")