File size: 3,442 Bytes
01b8e8e
 
 
39503cb
4107940
39503cb
 
 
1b47089
39503cb
 
01b8e8e
 
 
c397816
01b8e8e
 
 
 
0a35ae0
 
 
57f7a2e
f670f93
c397816
9ff8b5f
9d9f8c0
7786dc7
57f7a2e
213d365
fae3074
9ff8b5f
fae3074
 
9f4d760
 
 
 
39503cb
 
01b8e8e
 
 
39503cb
01b8e8e
 
39503cb
5634055
39503cb
01b8e8e
acb72cc
 
 
 
 
 
01b8e8e
39503cb
01b8e8e
39503cb
 
01b8e8e
 
 
39503cb
5634055
39503cb
01b8e8e
 
0a35ae0
 
01b8e8e
 
0a35ae0
01b8e8e
 
 
 
 
 
39503cb
6a6afbf
 
 
843bc9e
39503cb
01b8e8e
 
 
acb72cc
 
 
 
 
 
 
b8b4666
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import streamlit as st
from streamlit_option_menu import option_menu
from core.search_index import index, search
from interface.components import (
    component_file_input,
    component_show_pipeline,
    component_show_search_result,
    component_text_input,
    component_article_url,
)


def page_landing_page(container):
    with container:
        st.header("Neural Search V2.1")

        st.markdown(
            "This is a tool to allow indexing & search content using neural capabilities"
        )
        st.markdown(
            "It uses the [Haystack](https://haystack.deepset.ai/overview/intro) open-source framework for building search systems"
        )
        st.markdown(
            "In this second version you can:"
            "\n  - Index raw text, URLs, CSVs, PDFs, Images and even audio!"
            "\n  - Use Dense Passage Retrieval, Keyword Search pipeline and DPR Ranker pipelines"
            "\n  - Search the indexed documents"
            "\n  - Read your responses out loud using the `audio_output` option!"
        )
        st.markdown(
            "TODO list:"
            "\n  - File type classification and converter nodes"
            "\n  - Build other pipelines"
        )
        st.markdown(
            "Follow development of the tool [here](https://github.com/ugm2/neural-search-demo)"
            "\n\nDeveloped with πŸ’š by [@ugm2](https://github.com/ugm2)"
        )


def page_search(container):
    with container:
        st.title("Query me!")

        ## SEARCH ##
        query = st.text_input("Query")

        component_show_pipeline(st.session_state["pipeline"], "search_pipeline")

        if st.button("Search"):
            with st.spinner("Searching..."):
                st.session_state["search_results"] = search(
                    queries=[query],
                    pipeline=st.session_state["pipeline"]["search_pipeline"],
                )
        if st.session_state["search_results"] is not None:
            component_show_search_result(
                container=container, results=st.session_state["search_results"][0]
            )


def page_index(container):
    with container:
        st.title("Index time!")

        component_show_pipeline(st.session_state["pipeline"], "index_pipeline")

        input_funcs = {
            "Raw Text": (component_text_input, "card-text"),
            "URL": (component_article_url, "link"),
            "File": (component_file_input, "file-text"),
        }
        selected_input = option_menu(
            None,
            list(input_funcs.keys()),
            icons=[f[1] for f in input_funcs.values()],
            menu_icon="list",
            default_index=0,
            orientation="horizontal",
        )

        clear_index = st.sidebar.checkbox("Clear Index", True)

        doc_id = st.session_state["doc_id"]
        corpus, doc_id = input_funcs[selected_input][0](container, doc_id)

        if len(corpus) > 0:
            index_results = None
            if st.button("Index"):
                with st.spinner("Indexing..."):
                    index_results = index(
                        documents=corpus,
                        pipeline=st.session_state["pipeline"]["index_pipeline"],
                        clear_index=clear_index,
                    )
                    st.session_state["doc_id"] = doc_id
                st.success(f"{len(index_results)} documents indexed successfully!")