Spaces:
Runtime error
Runtime error
new changes
Browse files- .DS_Store +0 -0
- Responses/amazon.json +0 -0
- Responses/facebook.json +0 -0
- Responses/google.json +0 -0
- Responses/semrush.json +0 -0
- Responses/upthrust.json +0 -0
- __pycache__/ask_questions.cpython-311.pyc +0 -0
- __pycache__/pagespeed.cpython-311.pyc +0 -0
- app.py +46 -0
- ask_questions.py +92 -0
- pagespeed.py +74 -0
- processed/embeddings.csv +0 -0
- processed/scraped.csv +0 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Responses/amazon.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Responses/facebook.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Responses/google.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Responses/semrush.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Responses/upthrust.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
__pycache__/ask_questions.cpython-311.pyc
ADDED
Binary file (3.83 kB). View file
|
|
__pycache__/pagespeed.cpython-311.pyc
ADDED
Binary file (4.38 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from pagespeed import generate_response, process_data
|
3 |
+
from ask_questions import answer_question
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
df = pd.DataFrame()
|
8 |
+
df=pd.read_csv('processed/embeddings.csv', index_col=0)
|
9 |
+
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
|
10 |
+
# Set the title
|
11 |
+
|
12 |
+
if "button" not in st.session_state:
|
13 |
+
st.session_state.button = False
|
14 |
+
|
15 |
+
st.title("PageSpeed Insights")
|
16 |
+
|
17 |
+
#start app
|
18 |
+
st.write("Enter a URL to get a PageSpeed Insights report")
|
19 |
+
|
20 |
+
# Get the URL from the user
|
21 |
+
url = st.text_input("URL", "https://www.google.com")
|
22 |
+
|
23 |
+
# If the user clicks the button
|
24 |
+
|
25 |
+
if st.button("Get Report") or st.session_state.button:
|
26 |
+
with st.spinner(text="Collecting data..."):
|
27 |
+
st.session_state.button = True
|
28 |
+
# Get the response
|
29 |
+
data = generate_response(url)
|
30 |
+
# Process the data
|
31 |
+
issues = process_data(data)
|
32 |
+
# Show the data
|
33 |
+
|
34 |
+
# for each issue in issues, make the title as an st.expander. When the expander is clicked, it shows its description and item. Also add a button in which the user can click to get the answer to the question.
|
35 |
+
|
36 |
+
for index, issue in enumerate(issues):
|
37 |
+
title = issue["title"]
|
38 |
+
desc = issue["description"]
|
39 |
+
item = issue["item"]
|
40 |
+
|
41 |
+
with st.expander(title):
|
42 |
+
st.write(desc)
|
43 |
+
st.write(item)
|
44 |
+
if st.button("Fix Issue", key=index):
|
45 |
+
question = f"Title: {title}\nDescription: {desc}\nItem: {item}"
|
46 |
+
st.write(answer_question(df, question=issue["description"], debug=False))
|
ask_questions.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import re
|
3 |
+
import urllib.request
|
4 |
+
from bs4 import BeautifulSoup
|
5 |
+
from collections import deque
|
6 |
+
from html.parser import HTMLParser
|
7 |
+
from urllib.parse import urlparse
|
8 |
+
import os
|
9 |
+
import pandas as pd
|
10 |
+
import tiktoken
|
11 |
+
import openai
|
12 |
+
import numpy as np
|
13 |
+
from openai.embeddings_utils import distances_from_embeddings, cosine_similarity
|
14 |
+
import streamlit as st
|
15 |
+
openai.api_key = st.secrets["openai_api_key"]
|
16 |
+
|
17 |
+
|
18 |
+
df=pd.read_csv('processed/embeddings.csv', index_col=0)
|
19 |
+
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
|
20 |
+
|
21 |
+
def create_context(
|
22 |
+
question, df, max_len=1800, size="ada"
|
23 |
+
):
|
24 |
+
|
25 |
+
# Create a context for a question by finding the most similar context from the dataframe
|
26 |
+
|
27 |
+
|
28 |
+
# Get the embeddings for the question
|
29 |
+
q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']
|
30 |
+
|
31 |
+
# Get the distances from the embeddings
|
32 |
+
df['distances'] = distances_from_embeddings(q_embeddings, df['embeddings'].values, distance_metric='cosine')
|
33 |
+
|
34 |
+
|
35 |
+
returns = []
|
36 |
+
cur_len = 0
|
37 |
+
|
38 |
+
# Sort by distance and add the text to the context until the context is too long
|
39 |
+
for i, row in df.sort_values('distances', ascending=True).iterrows():
|
40 |
+
|
41 |
+
# Add the length of the text to the current length
|
42 |
+
cur_len += row['n_tokens'] + 4
|
43 |
+
|
44 |
+
# If the context is too long, break
|
45 |
+
if cur_len > max_len:
|
46 |
+
break
|
47 |
+
|
48 |
+
# Else add it to the text that is being returned
|
49 |
+
returns.append(row["text"])
|
50 |
+
|
51 |
+
# Return the context
|
52 |
+
return "\n\n###\n\n".join(returns)
|
53 |
+
|
54 |
+
def answer_question(
|
55 |
+
df,
|
56 |
+
model="text-davinci-003",
|
57 |
+
question="Am I allowed to publish model outputs to Twitter, without a human review?",
|
58 |
+
max_len=3000,
|
59 |
+
size="ada",
|
60 |
+
debug=False,
|
61 |
+
max_tokens=500,
|
62 |
+
stop_sequence=None
|
63 |
+
):
|
64 |
+
#Answer a question based on the most similar context from the dataframe texts
|
65 |
+
context = create_context(
|
66 |
+
question,
|
67 |
+
df,
|
68 |
+
max_len=max_len,
|
69 |
+
size=size,
|
70 |
+
)
|
71 |
+
# If debug, print the raw model response
|
72 |
+
if debug:
|
73 |
+
print("Context:\n" + context)
|
74 |
+
print("\n\n")
|
75 |
+
|
76 |
+
try:
|
77 |
+
# Create a completions using the questin and context
|
78 |
+
response = openai.Completion.create(
|
79 |
+
prompt=f"You are an SEO anaylzer. \nYou will be given:\n An issue, \nthe description of the issue\nthe items that cause the issue.\n Describe the Issue. Show the appropriate solution to the issue. Implement the solution to the issue and show the fix on the given items in the issue. Give only the fix to the item given in the issue. \nContext: {context}\n\n---\n\nIssue: {question}",
|
80 |
+
temperature=0,
|
81 |
+
max_tokens=max_tokens,
|
82 |
+
top_p=1,
|
83 |
+
frequency_penalty=0,
|
84 |
+
presence_penalty=0,
|
85 |
+
stop=stop_sequence,
|
86 |
+
model=model
|
87 |
+
)
|
88 |
+
return response["choices"][0]["text"].strip()
|
89 |
+
except Exception as e:
|
90 |
+
print(e)
|
91 |
+
return ""
|
92 |
+
|
pagespeed.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import pandas as pd
|
6 |
+
from ask_questions import answer_question
|
7 |
+
import numpy as np
|
8 |
+
import streamlit as st
|
9 |
+
|
10 |
+
df = pd.DataFrame()
|
11 |
+
|
12 |
+
def extract_url_from_string(string):
|
13 |
+
return re.search("(?P<url>https?://[^\s]+)", string).group("url")
|
14 |
+
|
15 |
+
def process_data(data):
|
16 |
+
audits = [data["lighthouseResult"]["audits"][i] for i in data["lighthouseResult"]["audits"]]
|
17 |
+
audits_names = [i["title"] for i in audits]
|
18 |
+
|
19 |
+
scoresdisplays = [data["lighthouseResult"]["audits"][i]["scoreDisplayMode"] for i in data["lighthouseResult"]["audits"]]
|
20 |
+
|
21 |
+
df=pd.read_csv('processed/embeddings.csv', index_col=0)
|
22 |
+
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
|
23 |
+
issues = []
|
24 |
+
for i in audits:
|
25 |
+
if i["scoreDisplayMode"] != "notApplicable" and (i["score"] != 1 and i["score"] != None) and "details" in i.keys() and i["scoreDisplayMode"] != "informative":
|
26 |
+
title = i["title"]
|
27 |
+
desc = i["description"]
|
28 |
+
item = i["details"]["items"][0]
|
29 |
+
typeOfIssue = i["details"]["type"]
|
30 |
+
dicto = {"title": title, "description": desc, "item": item, "type": typeOfIssue}
|
31 |
+
issues.append(dicto)
|
32 |
+
print(title)
|
33 |
+
print(i["details"]["type"])
|
34 |
+
question = f"Title: {title}\nDescription: {desc}\nItem: {item}"
|
35 |
+
#print(answer_question(df, question=question, debug=False))
|
36 |
+
print("***********************************")
|
37 |
+
return issues
|
38 |
+
|
39 |
+
|
40 |
+
def generate_response(website_url, url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed", api_key=st.secrets["page_speed_api_key"]):
|
41 |
+
print("Website: " + website_url)
|
42 |
+
print()
|
43 |
+
name = website_url.split("//")[1].split(".")[1] # Get the name of the website
|
44 |
+
|
45 |
+
params = {
|
46 |
+
"url": website_url,
|
47 |
+
"key": api_key,
|
48 |
+
"category": ["performance", "accessibility", "best_practices", "seo"]
|
49 |
+
}
|
50 |
+
|
51 |
+
try:
|
52 |
+
#output_file_path = f"Responses/{name}.json"
|
53 |
+
#if not os.path.exists(output_file_path):
|
54 |
+
|
55 |
+
response = requests.get(url, params=params)
|
56 |
+
response.raise_for_status() # Check for any request errors
|
57 |
+
|
58 |
+
data = response.json()
|
59 |
+
"""
|
60 |
+
with open(output_file_path, "w") as output_file:
|
61 |
+
json.dump(data, output_file, indent=4)
|
62 |
+
else:
|
63 |
+
with open(output_file_path) as output_file:
|
64 |
+
data = json.load(output_file)"""
|
65 |
+
|
66 |
+
# Process the data as needed
|
67 |
+
return data
|
68 |
+
|
69 |
+
except requests.exceptions.RequestException as e:
|
70 |
+
print("Error:", e)
|
71 |
+
#for i in list_of_urls:
|
72 |
+
# data = generate_response(i)
|
73 |
+
# process_data(data)
|
74 |
+
#https://chat.openai.com/share/71d7a128-b56d-4368-9eee-beda874e4200
|
processed/embeddings.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
processed/scraped.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|