Spaces:
Sleeping
Sleeping
Change Database port to work on HuggingFace Spaces properly
Browse files- config.py +11 -3
- database.py +6 -1
config.py
CHANGED
@@ -6,11 +6,19 @@ qdrant_api_key = os.getenv('QDRANT_API_KEY')
|
|
6 |
description = """This is a Kanji image search demo. Draw or upload an image of an individual Kanji character."""
|
7 |
|
8 |
article = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
### About this project
|
10 |
|
11 |
-
|
12 |
|
13 |
-
|
14 |
|
15 |
-
The
|
16 |
"""
|
|
|
6 |
description = """This is a Kanji image search demo. Draw or upload an image of an individual Kanji character."""
|
7 |
|
8 |
article = """
|
9 |
+
### Getting better results
|
10 |
+
|
11 |
+
Try different brush sizes.
|
12 |
+
Try to draw it centered in the middle of the canvas, both horizontally and vertically.
|
13 |
+
You may want to try using an external tool to draw then import a file.
|
14 |
+
|
15 |
+
The results is sorted by estimated distance from the input, but will rarely give the exact Kanji you are searching for as the first result
|
16 |
+
|
17 |
### About this project
|
18 |
|
19 |
+
It uses the "kha-white/manga-ocr-base" Vision Transformer Encoder model to create embeddings, then uses a vector database (qdrant) to find similar characters.
|
20 |
|
21 |
+
You can find the code used to create the embeddings as well as more information in https://github.com/etrotta/kanji_lookup
|
22 |
|
23 |
+
The database has been populated with over 10000 characters from [The KANJIDIC project](https://www.edrdg.org/wiki/index.php/KANJIDIC_Project), each rendered in multiple fonts downloaded from Google Fonts
|
24 |
"""
|
database.py
CHANGED
@@ -4,7 +4,12 @@ from qdrant_client import QdrantClient, models
|
|
4 |
|
5 |
from config import qdrant_location, qdrant_api_key
|
6 |
|
7 |
-
qdrant = QdrantClient(
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
def search_vector(query_vector: torch.Tensor, limit: int=20) -> list[models.ScoredPoint]:
|
10 |
hits = qdrant.search(
|
|
|
4 |
|
5 |
from config import qdrant_location, qdrant_api_key
|
6 |
|
7 |
+
qdrant = QdrantClient(
|
8 |
+
qdrant_location,
|
9 |
+
api_key=qdrant_api_key,
|
10 |
+
port=443,
|
11 |
+
timeout=30,
|
12 |
+
)
|
13 |
|
14 |
def search_vector(query_vector: torch.Tensor, limit: int=20) -> list[models.ScoredPoint]:
|
15 |
hits = qdrant.search(
|