Spaces:
Running
Running
Add Hacker News app
Browse files- .gitignore +2 -0
- Dockerfile +59 -0
- README.md +2 -1
- hacker-news-social-listener/.gitignore +2 -0
- hacker-news-social-listener/.wf/components-page-0-c0f99a9e-5004-4e75-a6c6-36f17490b134.jsonl +34 -0
- hacker-news-social-listener/.wf/components-root.jsonl +1 -0
- hacker-news-social-listener/.wf/components-workflows_root.jsonl +1 -0
- hacker-news-social-listener/.wf/metadata.json +3 -0
- hacker-news-social-listener/README.md +45 -0
- hacker-news-social-listener/main.py +329 -0
- hacker-news-social-listener/poetry.lock +0 -0
- hacker-news-social-listener/prompts.py +34 -0
- hacker-news-social-listener/static/README.md +8 -0
- hacker-news-social-listener/static/custom.css +17 -0
- hacker-news-social-listener/static/custom.js +5 -0
- hacker-news-social-listener/static/favicon.png +0 -0
- pyproject.toml +23 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
__pycache__
|
Dockerfile
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Build stage
|
2 |
+
FROM python:3.11-slim-buster AS Build
|
3 |
+
|
4 |
+
# Set environment variables for Python and Poetry
|
5 |
+
ENV PYTHONUNBUFFERED=1 \
|
6 |
+
PIP_NO_CACHE_DIR=1 \
|
7 |
+
POETRY_NO_INTERACTION=1 \
|
8 |
+
POETRY_VIRTUALENVS_CREATE=false \
|
9 |
+
POETRY_VERSION=1.7.1
|
10 |
+
|
11 |
+
# Set the working directory in the container
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
# Copy the dependencies file to the working directory
|
15 |
+
COPY ./pyproject.toml /app/
|
16 |
+
|
17 |
+
# Update, install dependencies, and prepare the Python environment
|
18 |
+
RUN apt-get update && \
|
19 |
+
apt-get install -y gcc g++ unixodbc-dev && \
|
20 |
+
pip install "poetry==$POETRY_VERSION" && \
|
21 |
+
poetry export --without-hashes --format requirements.txt --output requirements.txt && \
|
22 |
+
python3 -m pip wheel --no-cache-dir --no-deps -w /app/wheels -r requirements.txt
|
23 |
+
|
24 |
+
# Runtime stage
|
25 |
+
FROM python:3.11-slim-buster AS Run
|
26 |
+
|
27 |
+
# Set environment variables for Python and Poetry
|
28 |
+
ENV HOME=/home/user \
|
29 |
+
PATH=/home/user/.local/bin:$PATH
|
30 |
+
|
31 |
+
# Create a non-root user
|
32 |
+
RUN useradd -m -u 1000 user
|
33 |
+
|
34 |
+
# Switch to the non-root user
|
35 |
+
USER user
|
36 |
+
|
37 |
+
# Copy wheel files from the build stage
|
38 |
+
COPY --from=build /app/wheels $HOME/app/wheels
|
39 |
+
|
40 |
+
# Set the working directory to where the wheels are
|
41 |
+
WORKDIR $HOME/app/wheels
|
42 |
+
|
43 |
+
# Install the wheel files
|
44 |
+
RUN pip3 --no-cache-dir install *.whl
|
45 |
+
|
46 |
+
# Change app name here to copy the application files to the working directory
|
47 |
+
COPY --chown=user ./hacker-news-social-listener $HOME/app
|
48 |
+
|
49 |
+
# Set the working directory to the application files
|
50 |
+
WORKDIR $HOME/app
|
51 |
+
|
52 |
+
# Specify the command to run the application
|
53 |
+
ENTRYPOINT [ "writer", "run" ]
|
54 |
+
|
55 |
+
# Expose the port the app runs on
|
56 |
+
EXPOSE 8080
|
57 |
+
|
58 |
+
# Set the default command to run the app
|
59 |
+
CMD [ ".", "--port", "8080", "--host", "0.0.0.0" ]
|
README.md
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
---
|
2 |
title: Hacker News Listener
|
3 |
-
emoji:
|
4 |
colorFrom: purple
|
5 |
colorTo: red
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
short_description: Navigate and analyze Hacker News posts and comments.
|
|
|
9 |
---
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Hacker News Listener
|
3 |
+
emoji: 🎧
|
4 |
colorFrom: purple
|
5 |
colorTo: red
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
short_description: Navigate and analyze Hacker News posts and comments.
|
9 |
+
app_port: 8080
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
hacker-news-social-listener/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
static/hackernews_comments.csv
|
2 |
+
static/hackernews_posts.csv
|
hacker-news-social-listener/.wf/components-page-0-c0f99a9e-5004-4e75-a6c6-36f17490b134.jsonl
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"id": "c0f99a9e-5004-4e75-a6c6-36f17490b134", "type": "page", "content": {"pageMode": "compact"}, "handlers": {}, "isCodeManaged": false, "parentId": "root", "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
|
2 |
+
{"id": "bebc5fe9-63a7-46a7-b0fa-62303555cfaf", "type": "header", "content": {"text": "Hacker News Listener"}, "handlers": {}, "isCodeManaged": false, "parentId": "c0f99a9e-5004-4e75-a6c6-36f17490b134", "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
|
3 |
+
{"id": "m7luxumzscv65i09", "type": "tabs", "content": {}, "handlers": {}, "isCodeManaged": false, "parentId": "c0f99a9e-5004-4e75-a6c6-36f17490b134", "position": 1, "visible": {"binding": "", "expression": true, "reversed": false}}
|
4 |
+
{"id": "lon4vs20gd3myh7e", "type": "tab", "content": {"name": "Setup"}, "handlers": {}, "isCodeManaged": false, "parentId": "m7luxumzscv65i09", "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
|
5 |
+
{"id": "xlc1wtcwu1g2i07p", "type": "heading", "content": {"text": "Navigate and analyze Hacker News posts and comments using Writer graph-based RAG and Palmyra X 004."}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 0}
|
6 |
+
{"id": "62r9auy895datfnp", "type": "message", "content": {"message": "@{message_setup}"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 1, "visible": {"binding": "message_setup_vis", "expression": "custom", "reversed": false}}
|
7 |
+
{"id": "mmh30t1tiyv5mi9s", "type": "section", "content": {"containerBackgroundColor": "#BFCBFF", "isCollapsible": "yes", "title": "\ud83d\udd0e Pull latest N posts"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 2}
|
8 |
+
{"id": "hpeff7ha6v27zsk8", "type": "text", "content": {"text": "**Pull up to 500 Hacker News stories, with or without comments.** Data from the Hacker News API is sent to to Writer\u2019s graph-based RAG system, known as a Knowledge Graph. This is accessed dynamically by an LLM through the Writer RAG tool.", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "mmh30t1tiyv5mi9s", "position": 0}
|
9 |
+
{"id": "up5ofq0drv233umy", "type": "section", "content": {"containerBackgroundColor": "#FFD8CD", "isCollapsible": "yes", "title": "\ud83d\ude4b\u200d\u2640\ufe0f Ask questions"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 3}
|
10 |
+
{"id": "9tlgkbsuw9wgpocp", "type": "text", "content": {"text": "**Explore trending topics, recent product releases, open-source projects, or job listings.** The LLM, Palmyra X 004, interprets user queries, calls the Writer RAG tool to retrieve relevant Hacker News posts, and generates responses.", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "up5ofq0drv233umy", "position": 0}
|
11 |
+
{"id": "tmi69i8jt7u50b99", "type": "section", "content": {"containerBackgroundColor": "#D4B2F7", "isCollapsible": "yes", "title": "\ud83d\udcc4 Generate a report"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 4}
|
12 |
+
{"id": "taarom6dd93ryw1k", "type": "text", "content": {"text": "**Automatically summarize the most popular topics in one click.** All posts are loaded into Palmyra X 004's 128k context window, which are analyzed to produce a structured report highlighting key trends and insights.", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "tmi69i8jt7u50b99", "position": 0}
|
13 |
+
{"id": "5myjrizpz5uxilif", "type": "text", "content": {"text": "#### \ud83c\udfd7\ufe0f <a href=\"https://writer.com/engineering/rag-tool/\" target=\"_blank\">Build your own RAG app</a>. | \ud83d\udcbb <a href=\"https://github.com/writer/framework-tutorials/tree/main/hacker-news-social-listener\">Get the code.</a>", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 5, "visible": {"binding": "", "expression": true, "reversed": false}}
|
14 |
+
{"id": "wcljpgx0f50y5kac", "type": "sliderinput", "binding": {"eventType": "wf-number-change", "stateRef": "fetch_limit"}, "content": {"label": "Pull the last N posts", "maxValue": "500", "minValue": "10", "stepSize": "10"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 6, "visible": {"binding": "", "expression": true, "reversed": false}}
|
15 |
+
{"id": "zw8nru7f036fdubh", "type": "horizontalstack", "content": {}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 7}
|
16 |
+
{"id": "tk9h8c6f6kf44x7z", "type": "switchinput", "binding": {"eventType": "wf-toggle", "stateRef": "allow_comments"}, "content": {"label": "Include comments"}, "handlers": {}, "isCodeManaged": false, "parentId": "zw8nru7f036fdubh", "position": 0}
|
17 |
+
{"id": "cwgi42r2nxzja4gg", "type": "button", "content": {"text": "Fetch posts"}, "handlers": {"wf-click": "fetch_posts"}, "isCodeManaged": false, "parentId": "zw8nru7f036fdubh", "position": 1}
|
18 |
+
{"id": "yo212stq5so5c5au", "type": "tab", "content": {"name": "Raw data"}, "handlers": {}, "isCodeManaged": false, "parentId": "m7luxumzscv65i09", "position": 1, "visible": {"binding": "", "expression": true, "reversed": false}}
|
19 |
+
{"id": "7gzo3w8vnbqvld8r", "type": "heading", "content": {"text": "Scraped posts"}, "handlers": {}, "isCodeManaged": false, "parentId": "yo212stq5so5c5au", "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
|
20 |
+
{"id": "46q42e5z5jninm4o", "type": "dataframe", "content": {"dataframe": "@{posts}", "enableDownload": "yes", "enableSearch": "yes", "showIndex": "no", "useMarkdown": "yes", "wrapText": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "yo212stq5so5c5au", "position": 1, "visible": {"binding": "", "expression": true, "reversed": false}}
|
21 |
+
{"id": "2d3gfh1tavmi1iab", "type": "heading", "content": {"text": "Scraped comments"}, "handlers": {}, "isCodeManaged": false, "parentId": "yo212stq5so5c5au", "position": 2, "visible": {"binding": "allow_comments", "expression": "custom", "reversed": false}}
|
22 |
+
{"id": "q0275uf0oldds8v8", "type": "dataframe", "content": {"dataframe": "@{comments}", "enableDownload": "yes", "enableSearch": "yes", "showIndex": "no", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "yo212stq5so5c5au", "position": 3, "visible": {"binding": "allow_comments", "expression": "custom", "reversed": false}}
|
23 |
+
{"id": "8eghbz6jlckh3zmd", "type": "tab", "content": {"name": "Chat with Hacker News"}, "handlers": {}, "isCodeManaged": false, "parentId": "m7luxumzscv65i09", "position": 2, "visible": {"binding": "", "expression": true, "reversed": false}}
|
24 |
+
{"id": "rpi88dvxmlxr0qd9", "type": "columns", "content": {}, "handlers": {}, "isCodeManaged": false, "parentId": "8eghbz6jlckh3zmd", "position": 0}
|
25 |
+
{"id": "rosw32keaejygiir", "type": "column", "content": {"width": "13"}, "handlers": {}, "isCodeManaged": false, "parentId": "rpi88dvxmlxr0qd9", "position": 0}
|
26 |
+
{"id": "crm2bdbrjclid4k4", "type": "chatbot", "content": {"conversation": "@{conversation}", "useMarkdown": "yes"}, "handlers": {"wf-chatbot-message": "message_handler"}, "isCodeManaged": false, "parentId": "rosw32keaejygiir", "position": 0}
|
27 |
+
{"id": "814lb9dktd2e1ye3", "type": "horizontalstack", "content": {"contentHAlign": "start"}, "handlers": {}, "isCodeManaged": false, "parentId": "rosw32keaejygiir", "position": 1}
|
28 |
+
{"id": "t171dwz5muor2n9x", "type": "text", "content": {"alignment": "center", "text": "@{contributing_sources_button_text}", "useMarkdown": "yes"}, "handlers": {"wf-click": "contributing_sources_change_vis"}, "isCodeManaged": false, "parentId": "814lb9dktd2e1ye3", "position": 0}
|
29 |
+
{"id": "contributed_sources", "type": "column", "content": {"cssClasses": "files-list", "title": "Contributing sources", "width": "7"}, "handlers": {}, "isCodeManaged": false, "parentId": "rpi88dvxmlxr0qd9", "position": 1, "visible": {"binding": "contributing_sources_vis", "expression": "custom", "reversed": false}}
|
30 |
+
{"id": "erkqharzgysk5s3n", "type": "tab", "content": {"name": "Generate trend report"}, "handlers": {}, "isCodeManaged": false, "parentId": "m7luxumzscv65i09", "position": 3, "visible": {"binding": "", "expression": true, "reversed": false}}
|
31 |
+
{"id": "vzbrrx0dlp3skcn2", "type": "message", "content": {"message": "@{message_report}"}, "handlers": {}, "isCodeManaged": false, "parentId": "erkqharzgysk5s3n", "position": 0, "visible": {"binding": "message_report_vis", "expression": "custom", "reversed": false}}
|
32 |
+
{"id": "osvuzxnivfs3qrut", "type": "text", "content": {"text": "@{prepared_report}", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "erkqharzgysk5s3n", "position": 1}
|
33 |
+
{"id": "w2hg99rv80b12tpt", "type": "button", "content": {"text": "Generate report"}, "handlers": {"wf-click": "run_report"}, "isCodeManaged": false, "parentId": "erkqharzgysk5s3n", "position": 2, "visible": {"binding": "", "expression": true, "reversed": false}}
|
34 |
+
{"id": "6ugh4p7vlu1j7fvp", "type": "text", "content": {"alignment": "center", "text": "Made with \u2764\ufe0f by <a href=\"http://www.writer.com\">Writer</a>.", "useMarkdown": "yes"}, "handlers": {}, "parentId": "c0f99a9e-5004-4e75-a6c6-36f17490b134", "position": 2}
|
hacker-news-social-listener/.wf/components-root.jsonl
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"id": "root", "type": "root", "content": {"appName": "Hacker News Listener"}, "handlers": {}, "isCodeManaged": false, "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
|
hacker-news-social-listener/.wf/components-workflows_root.jsonl
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"id": "workflows_root", "type": "workflows_root", "content": {}, "handlers": {}, "isCodeManaged": false, "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
|
hacker-news-social-listener/.wf/metadata.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"writer_version": "0.8.2"
|
3 |
+
}
|
hacker-news-social-listener/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Hacker News Listener
|
2 |
+
This application is built using the Writer Framework and is designed to scrape the top posts and comments from Hacker News. It processes the data, uploads it to a Writer Knowledge Graph for further analysis, and generates AI-powered insights based on the content of the posts.
|
3 |
+
|
4 |
+
## Usage
|
5 |
+
|
6 |
+
1. Select the number of items you wish to process.
|
7 |
+
2. The application will generate raw data with analysis of posts and comments.
|
8 |
+
3. Ask specific questions using the Knowledge Graph chat.
|
9 |
+
4. Generate a detailed report from the processed data using the Prepared Report feature.
|
10 |
+
|
11 |
+
## Running the application
|
12 |
+
First, ensure you have Poetry installed. Then, in the project directory, install the dependencies by running:
|
13 |
+
|
14 |
+
```sh
|
15 |
+
poetry install
|
16 |
+
```
|
17 |
+
|
18 |
+
To build this application, you'll need to sign up for [Writer AI Studio](https://app.writer.com/aistudio/signup?utm_campaign=devrel), create a new API Key and Knowledge Graph. To pass your API key and Knowledge Graph to the Writer Framework, you'll need to set an environment variables called `WRITER_API_KEY` and `GRAPH_ID`:
|
19 |
+
```sh
|
20 |
+
export WRITER_API_KEY=your-api-key
|
21 |
+
```
|
22 |
+
```sh
|
23 |
+
export GRAPH_ID=your-graph-id
|
24 |
+
```
|
25 |
+
|
26 |
+
You can also set the `WRITER_API_KEY` and `GRAPH_ID` in the `.env` file.
|
27 |
+
|
28 |
+
To make changes or edit the application, navigate to root folder and use the following command:
|
29 |
+
|
30 |
+
|
31 |
+
```sh
|
32 |
+
writer edit .
|
33 |
+
```
|
34 |
+
|
35 |
+
Once you're ready to run the application, execute:
|
36 |
+
|
37 |
+
```sh
|
38 |
+
writer run .
|
39 |
+
```
|
40 |
+
|
41 |
+
To learn more, check out the [full documentation for Writer Framework](https://dev.writer.com/framework/introduction).
|
42 |
+
|
43 |
+
## About Writer
|
44 |
+
|
45 |
+
Writer is the full-stack generative AI platform for enterprises. Quickly and easily build and deploy generative AI apps with a suite of developer tools fully integrated with our platform of LLMs, graph-based RAG tools, AI guardrails, and more. Learn more at [writer.com](https://www.writer.com?utm_source=github&utm_medium=readme&utm_campaign=framework).
|
hacker-news-social-listener/main.py
ADDED
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import os
|
3 |
+
from datetime import datetime
|
4 |
+
from pathlib import Path
|
5 |
+
from typing import Any, List, Optional
|
6 |
+
|
7 |
+
import aiohttp
|
8 |
+
import pandas as pd
|
9 |
+
import requests
|
10 |
+
import writer as wf
|
11 |
+
from aiohttp import ClientSession
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
from prompts import report_prompt
|
14 |
+
from writer import WriterState
|
15 |
+
from writer.ai import (
|
16 |
+
Conversation,
|
17 |
+
File,
|
18 |
+
list_files,
|
19 |
+
retrieve_file,
|
20 |
+
retrieve_graph,
|
21 |
+
upload_file,
|
22 |
+
)
|
23 |
+
|
24 |
+
load_dotenv()
|
25 |
+
|
26 |
+
GRAPH_ID = os.getenv("GRAPH_ID", "")
|
27 |
+
HACKERNEWS_API_URL = os.getenv("HACKERNEWS_API_URL", "")
|
28 |
+
WRITER_API_KEY = os.getenv("WRITER_API_KEY", "")
|
29 |
+
|
30 |
+
wf.Config.feature_flags = ["dataframeEditor"]
|
31 |
+
|
32 |
+
|
33 |
+
def main(state: WriterState) -> None:
|
34 |
+
_delete_files_from_graph(GRAPH_ID)
|
35 |
+
state["message_setup"] = "%Scraping data"
|
36 |
+
state["message_setup_vis"] = True
|
37 |
+
|
38 |
+
posts, comments = _scrape_hackernews(state)
|
39 |
+
state["message_setup"] = "%Data was scraped"
|
40 |
+
state["posts"] = posts[["title", "created_utc", "score", "num_comments", "url"]] if posts is not None else pd.DataFrame()
|
41 |
+
if state["allow_comments"]:
|
42 |
+
state["comments"] = comments[["body", "author", "created_utc"]] if comments is not None else pd.DataFrame()
|
43 |
+
state["message_setup"] = "%Scraped data, now saving to csv"
|
44 |
+
|
45 |
+
_save_results_to_csv(state)
|
46 |
+
state["message_setup"] = "%Saved data, now uploading to KG"
|
47 |
+
|
48 |
+
files_path = "static/hackernews_posts.csv"
|
49 |
+
_upload_file_and_add_to_graph(files_path, GRAPH_ID)
|
50 |
+
state["message_setup"] = "%Uploaded file and added to graph"
|
51 |
+
|
52 |
+
if state["allow_comments"]:
|
53 |
+
file_path = "static/hackernews_comments.csv"
|
54 |
+
_upload_file_and_add_to_graph(file_path, GRAPH_ID)
|
55 |
+
|
56 |
+
state["message_setup"] = "+Scrapping is completed!"
|
57 |
+
state["message_setup_vis"] = False
|
58 |
+
|
59 |
+
|
60 |
+
def _delete_files_from_graph(graph_id: str) -> None:
|
61 |
+
try:
|
62 |
+
graph = retrieve_graph(graph_id=graph_id)
|
63 |
+
graph_files = list_files(config={"extra_query": {"graph_id": graph_id}})
|
64 |
+
|
65 |
+
if not graph_files:
|
66 |
+
print("No files found in the specified graph.")
|
67 |
+
|
68 |
+
for file_id in graph_files:
|
69 |
+
graph.remove_file(file_id.id)
|
70 |
+
|
71 |
+
except Exception as e:
|
72 |
+
print(f"An error while file deletion occurred: {str(e)}")
|
73 |
+
|
74 |
+
|
75 |
+
def _get_file_from_graph(file_id: str) -> Optional[File]:
|
76 |
+
try:
|
77 |
+
return retrieve_file(file_id=file_id)
|
78 |
+
except Exception as e:
|
79 |
+
print(f"An error while file obtainment occurred: {str(e)}")
|
80 |
+
return None
|
81 |
+
|
82 |
+
|
83 |
+
def _scrape_hackernews(state: WriterState) -> tuple[Any, Any]:
|
84 |
+
stories_ids = _get_stories_ids(state)
|
85 |
+
posts, comments_ids = _get_posts(stories_ids)
|
86 |
+
comments = _get_comments(comments_ids)
|
87 |
+
|
88 |
+
if len(posts) > 0:
|
89 |
+
state["posts"] = pd.DataFrame(posts).sort_values(
|
90 |
+
by=["score", "num_comments"], ascending=False
|
91 |
+
)
|
92 |
+
|
93 |
+
if len(comments) > 0:
|
94 |
+
state["comments"] = pd.DataFrame(comments)
|
95 |
+
|
96 |
+
return state["posts"], state["comments"]
|
97 |
+
|
98 |
+
|
99 |
+
def _get_stories_ids(state: WriterState) -> List[str]:
|
100 |
+
top_stories_url = f"{HACKERNEWS_API_URL}/topstories.json"
|
101 |
+
try:
|
102 |
+
response = requests.get(url=top_stories_url, timeout=5)
|
103 |
+
|
104 |
+
if response.status_code != 200:
|
105 |
+
print("Failed to fetch data from Hacker News")
|
106 |
+
return []
|
107 |
+
|
108 |
+
return response.json()[: int(state["fetch_limit"])]
|
109 |
+
except Exception as e:
|
110 |
+
print(f"Failed to fetch story ids from Hacker News: {str(e)}")
|
111 |
+
return []
|
112 |
+
|
113 |
+
|
114 |
+
def _get_posts(stories_ids: List[str]) -> (List[dict], List[int]):
|
115 |
+
try:
|
116 |
+
stories_urls = [
|
117 |
+
f"{HACKERNEWS_API_URL}/item/{story_id}.json" for story_id in stories_ids
|
118 |
+
]
|
119 |
+
stories = asyncio.run(_perform_calls(stories_urls))
|
120 |
+
|
121 |
+
comments_ids = []
|
122 |
+
posts_data = []
|
123 |
+
|
124 |
+
for story in stories:
|
125 |
+
posts_data.append(
|
126 |
+
{
|
127 |
+
"post_id": str(story.get("id", "")),
|
128 |
+
"title": story.get("title", ""),
|
129 |
+
"author": story.get("by", ""),
|
130 |
+
"score": int(story.get("score", 0)),
|
131 |
+
"created_utc": datetime.fromtimestamp(story.get("time")).strftime(
|
132 |
+
"%Y-%m-%d %H:%M:%S"
|
133 |
+
),
|
134 |
+
"num_comments": int(story.get("descendants", 0)),
|
135 |
+
"url": story.get("url", ""),
|
136 |
+
}
|
137 |
+
)
|
138 |
+
|
139 |
+
kids = story.get("kids", [])
|
140 |
+
if kids:
|
141 |
+
comments_ids += kids
|
142 |
+
|
143 |
+
return posts_data, comments_ids
|
144 |
+
except Exception as e:
|
145 |
+
print(f"Failed to fetch stories from Hacker News: {str(e)}")
|
146 |
+
return ([], [])
|
147 |
+
|
148 |
+
|
149 |
+
def _get_comments(comments_ids: List[str]) -> List[dict]:
|
150 |
+
try:
|
151 |
+
comments_urls = [
|
152 |
+
f"{HACKERNEWS_API_URL}/item/{comment_id}.json"
|
153 |
+
for comment_id in comments_ids
|
154 |
+
]
|
155 |
+
comments_data = []
|
156 |
+
|
157 |
+
comments = asyncio.run(_perform_calls(comments_urls))
|
158 |
+
|
159 |
+
for comment in comments:
|
160 |
+
comments_data.append(
|
161 |
+
{
|
162 |
+
"comment_id": str(comment.get("id", "")),
|
163 |
+
"post_id": str(comment.get("parent", "")),
|
164 |
+
"author": comment.get("by", "anonymous"),
|
165 |
+
"created_utc": datetime.fromtimestamp(comment.get("time")).strftime(
|
166 |
+
"%Y-%m-%d %H:%M:%S"
|
167 |
+
),
|
168 |
+
"body": comment.get("text", ""),
|
169 |
+
}
|
170 |
+
)
|
171 |
+
|
172 |
+
return comments_data
|
173 |
+
except Exception as e:
|
174 |
+
print(f"Failed to fetch comments from Hacker News: {str(e)}")
|
175 |
+
return []
|
176 |
+
|
177 |
+
|
178 |
+
async def _fetch_data(session: ClientSession, url: str) -> str:
|
179 |
+
async with session.get(url) as response:
|
180 |
+
return await response.json()
|
181 |
+
|
182 |
+
|
183 |
+
async def _perform_calls(urls: List[str]) -> List[dict]:
|
184 |
+
async with aiohttp.ClientSession() as session:
|
185 |
+
tasks = [_fetch_data(session, url) for url in urls]
|
186 |
+
results = await asyncio.gather(*tasks)
|
187 |
+
|
188 |
+
return results
|
189 |
+
|
190 |
+
|
191 |
+
def _save_results_to_csv(state: WriterState) -> None:
|
192 |
+
state["posts"].to_csv("static/hackernews_posts.csv", index=False)
|
193 |
+
if state["allow_comments"]:
|
194 |
+
state["comments"].to_csv("static/hackernews_comments.csv", index=False)
|
195 |
+
|
196 |
+
|
197 |
+
def _upload_file_and_add_to_graph(file_path: str, graph_id: str) -> dict:
|
198 |
+
try:
|
199 |
+
file_id = _upload_file(file_path)
|
200 |
+
_add_file_to_graph(graph_id, file_id)
|
201 |
+
|
202 |
+
return {"file_id": file_id, "graph_id": graph_id}
|
203 |
+
except Exception as e:
|
204 |
+
print(f"An error while file uploading occurred: {str(e)}")
|
205 |
+
return {}
|
206 |
+
|
207 |
+
|
208 |
+
def _upload_file(file_path: str) -> str:
|
209 |
+
with open(file_path, "rb") as file:
|
210 |
+
uploaded_file = upload_file(
|
211 |
+
data=file.read(), name=Path(file.name).stem, type="text/csv"
|
212 |
+
)
|
213 |
+
return uploaded_file.id
|
214 |
+
|
215 |
+
|
216 |
+
def _add_file_to_graph(graph_id: str, file_id: str) -> None:
|
217 |
+
graph = retrieve_graph(graph_id)
|
218 |
+
graph.add_file(file_id)
|
219 |
+
|
220 |
+
|
221 |
+
def _handle_contributing_sources(state: WriterState, graph_data: dict) -> None:
|
222 |
+
sources = graph_data.get("sources")
|
223 |
+
if sources:
|
224 |
+
with wf.init_ui() as ui:
|
225 |
+
with ui.refresh_with("contributed_sources"):
|
226 |
+
for index, source in enumerate(sources):
|
227 |
+
source_file = _get_file_from_graph(source["file_id"])
|
228 |
+
source_snippet = source["snippet"]
|
229 |
+
ui.Section(
|
230 |
+
content={
|
231 |
+
"title": "📄 " + source_file.name,
|
232 |
+
"cssClasses": "file",
|
233 |
+
},
|
234 |
+
id=f"source {index}",
|
235 |
+
)
|
236 |
+
with ui.find(f"source {index}"):
|
237 |
+
ui.Text({"text": source_snippet, "cssClasses": "file-text"})
|
238 |
+
|
239 |
+
state["contributing_sources_vis"] = True
|
240 |
+
state["contributing_sources_button_text"] = "View contributing sources ▸"
|
241 |
+
|
242 |
+
|
243 |
+
def run_report(state: WriterState) -> None:
|
244 |
+
try:
|
245 |
+
state["message_report"] = "%Creating report"
|
246 |
+
state["message_report_vis"] = True
|
247 |
+
|
248 |
+
prompt = report_prompt(state["posts"], state["comments"])
|
249 |
+
report_convo = Conversation()
|
250 |
+
report_convo += {"role": "user", "content": prompt}
|
251 |
+
response = report_convo.stream_complete()
|
252 |
+
|
253 |
+
state["prepared_report"] = ""
|
254 |
+
|
255 |
+
for chunk in response:
|
256 |
+
state["prepared_report"] += chunk["content"]
|
257 |
+
|
258 |
+
state["message_report"] = "+Creation is finished!"
|
259 |
+
state["message_report_vis"] = False
|
260 |
+
except Exception as e:
|
261 |
+
state["prepared_report"] = "Something went wrong. Please try again!"
|
262 |
+
raise e
|
263 |
+
|
264 |
+
|
265 |
+
def fetch_posts(state: WriterState) -> None:
|
266 |
+
main(state)
|
267 |
+
|
268 |
+
|
269 |
+
def message_handler(payload: dict, state: WriterState) -> None:
|
270 |
+
try:
|
271 |
+
state.call_frontend_function("scripts", "enableDisableTextarea", ["true"])
|
272 |
+
state["conversation"] += payload
|
273 |
+
|
274 |
+
graph = retrieve_graph(GRAPH_ID)
|
275 |
+
|
276 |
+
response = state["conversation"].stream_complete(tools=graph)
|
277 |
+
|
278 |
+
for chunk in response:
|
279 |
+
state["conversation"] += chunk
|
280 |
+
|
281 |
+
graph_data = state["conversation"].messages[-1].get("graph_data")
|
282 |
+
if graph_data:
|
283 |
+
_handle_contributing_sources(state, graph_data)
|
284 |
+
|
285 |
+
state.call_frontend_function("scripts", "enableDisableTextarea", ["false"])
|
286 |
+
except Exception as e:
|
287 |
+
state["conversation"] += {
|
288 |
+
"role": "assistant",
|
289 |
+
"content": "Something went wrong. Please try again!",
|
290 |
+
}
|
291 |
+
state.call_frontend_function("scripts", "enableDisableTextarea", ["false"])
|
292 |
+
raise e
|
293 |
+
|
294 |
+
|
295 |
+
def contributing_sources_change_vis(state: WriterState) -> None:
|
296 |
+
state["contributing_sources_vis"] = not state["contributing_sources_vis"]
|
297 |
+
if state["contributing_sources_vis"]:
|
298 |
+
state["contributing_sources_button_text"] = "View contributing sources ▸"
|
299 |
+
else:
|
300 |
+
state["contributing_sources_button_text"] = "View contributing sources ◂"
|
301 |
+
|
302 |
+
|
303 |
+
initial_state = wf.init_state(
|
304 |
+
{
|
305 |
+
"conversation": Conversation(
|
306 |
+
[
|
307 |
+
{"role": "assistant", "content": "Ask me anything about the scraped Hacker News data."},
|
308 |
+
],
|
309 |
+
),
|
310 |
+
"response": None,
|
311 |
+
"file_path": "",
|
312 |
+
"graph_name": "",
|
313 |
+
"uploaded_file": None,
|
314 |
+
"graph_id": None,
|
315 |
+
"prepared_report": "# **Trend report**",
|
316 |
+
"contributing_sources_button_text": "View contributing sources ◂",
|
317 |
+
"message_setup": "",
|
318 |
+
"message_report": "",
|
319 |
+
"message_setup_vis": False,
|
320 |
+
"message_report_vis": False,
|
321 |
+
"contributing_sources_vis": False,
|
322 |
+
"fetch_limit": 100,
|
323 |
+
"allow_comments": True,
|
324 |
+
}
|
325 |
+
)
|
326 |
+
|
327 |
+
initial_state.import_frontend_module("scripts", "/static/custom.js")
|
328 |
+
initial_state.import_stylesheet("style", "/static/custom.css")
|
329 |
+
initial_state.call_frontend_function("scripts", "initSelectedDropdownOption", [])
|
hacker-news-social-listener/poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
hacker-news-social-listener/prompts.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def report_prompt(posts, comments):
|
2 |
+
return f"""
|
3 |
+
# CONTEXT #
|
4 |
+
You are an expert at analyzing large amounts of posts and comments
|
5 |
+
at social network for software developers called HackerNews. You are creating
|
6 |
+
summary reports of provided data. Furthermore, you are acting
|
7 |
+
really carefully outlining main trends, top posts and comments,
|
8 |
+
most famous topics and development approaches.
|
9 |
+
|
10 |
+
# INSTRUCTIONS #
|
11 |
+
Create an expertly written summary report.
|
12 |
+
|
13 |
+
# DATA #
|
14 |
+
Here are the posts and comments you will use to create the report.
|
15 |
+
|
16 |
+
Posts:
|
17 |
+
{posts}
|
18 |
+
|
19 |
+
Comments:
|
20 |
+
{comments}
|
21 |
+
|
22 |
+
# ADDITIONAL GUIDELINES #
|
23 |
+
- Reflect only top posts and comments. DO NOT reflect all data in your
|
24 |
+
report.
|
25 |
+
- FIT your report in 10-15 paragraphs. This is also very IMPORTANT.
|
26 |
+
- Say a few words about posts reflected at report.
|
27 |
+
- Provide some analysis of trends you are surveying: if users consider
|
28 |
+
theme useful or not, if they are pleased with it and so on.
|
29 |
+
- Outline most interesting, discussed and high rated comments and posts.
|
30 |
+
|
31 |
+
# RESPONSE FORMAT #
|
32 |
+
|
33 |
+
Highlight headers, topics, main ideas. Use .md markup to style your text.
|
34 |
+
"""
|
hacker-news-social-listener/static/README.md
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Serving static files
|
2 |
+
|
3 |
+
You can use this folder to store files which will be served statically in the "/static" route.
|
4 |
+
|
5 |
+
This is useful to store images and other files which will be served directly to the user of your application.
|
6 |
+
|
7 |
+
For example, if you store an image named "myimage.jpg" in this folder, it'll be accessible as "static/myimage.jpg".
|
8 |
+
You can use this relative route as the source in an Image component.
|
hacker-news-social-listener/static/custom.css
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.file{
|
2 |
+
height: 60vh !important;
|
3 |
+
}
|
4 |
+
|
5 |
+
.file-text{
|
6 |
+
height: 51vh !important;
|
7 |
+
overflow-y: auto !important;
|
8 |
+
}
|
9 |
+
|
10 |
+
.files-list{
|
11 |
+
height: 84vh !important;
|
12 |
+
overflow-y: auto !important;
|
13 |
+
}
|
14 |
+
|
15 |
+
.CoreSection__title h3{
|
16 |
+
font-size: 1.05rem !important;
|
17 |
+
}
|
hacker-news-social-listener/static/custom.js
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export function initSelectedDropdownOption() {
|
2 |
+
document.getElementsByClassName("subreddits")[0]
|
3 |
+
.getElementsByTagName("option")[0]
|
4 |
+
.selected = true
|
5 |
+
}
|
hacker-news-social-listener/static/favicon.png
ADDED
pyproject.toml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "writer-framework-default"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["Your Name <[email protected]>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.10.0"
|
10 |
+
writer = "^0.8.2"
|
11 |
+
praw = "^7.7.1"
|
12 |
+
black = "^24.8.0"
|
13 |
+
flake8 = "^7.1.1"
|
14 |
+
isort = "^5.13.2"
|
15 |
+
pre-commit = "^3.8.0"
|
16 |
+
python-dotenv = "^1.0.1"
|
17 |
+
aiohttp = "^3.10.10"
|
18 |
+
asyncio = "^3.4.3"
|
19 |
+
|
20 |
+
|
21 |
+
[build-system]
|
22 |
+
requires = ["poetry-core"]
|
23 |
+
build-backend = "poetry.core.masonry.api"
|