|
|
|
import os |
|
from typing import Optional, Type |
|
|
|
from asyncer import asyncify |
|
|
|
from lagent.actions.base_action import AsyncActionMixin, BaseAction, tool_api |
|
from lagent.schema import ActionReturn, ActionStatusCode |
|
from .parser import BaseParser, JsonParser |
|
|
|
|
|
class GoogleScholar(BaseAction): |
|
"""Plugin for google scholar search. |
|
|
|
Args: |
|
api_key (str): API KEY to use serper google search API, |
|
You can create a free API key at https://serper.dev. |
|
description (dict): The description of the action. Defaults to ``None``. |
|
parser (Type[BaseParser]): The parser class to process the |
|
action's inputs and outputs. Defaults to :class:`JsonParser`. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
api_key: Optional[str] = None, |
|
description: Optional[dict] = None, |
|
parser: Type[BaseParser] = JsonParser, |
|
): |
|
super().__init__(description, parser) |
|
api_key = os.environ.get('SERPER_API_KEY', api_key) |
|
if api_key is None: |
|
raise ValueError( |
|
'Please set Serper API key either in the environment ' |
|
'as SERPER_API_KEY or pass it as `api_key` parameter.' |
|
) |
|
self.api_key = api_key |
|
|
|
@tool_api(explode_return=True) |
|
def search_google_scholar( |
|
self, |
|
query: str, |
|
cites: Optional[str] = None, |
|
as_ylo: Optional[int] = None, |
|
as_yhi: Optional[int] = None, |
|
scisbd: Optional[int] = None, |
|
cluster: Optional[str] = None, |
|
hl: Optional[str] = None, |
|
lr: Optional[str] = None, |
|
start: Optional[int] = None, |
|
num: Optional[int] = None, |
|
as_sdt: Optional[str] = None, |
|
safe: Optional[str] = None, |
|
filter: Optional[str] = None, |
|
as_vis: Optional[str] = None, |
|
) -> dict: |
|
"""Search for scholarly articles based on a query according to the google scholar. |
|
|
|
Args: |
|
query (str): The query to search for. |
|
cites (Optional[str]): The unique ID of an article for triggering "Cited By" searches. |
|
as_ylo (Optional[int]): The starting year for results (e.g., if as_ylo=2018, results before this year will be omitted). |
|
as_yhi (Optional[int]): The ending year for results (e.g., if as_yhi=2018, results after this year will be omitted). |
|
scisbd (Optional[int]): Defines articles added in the last year, sorted by date. It can be set to 1 to include only abstracts, or 2 to include everything. |
|
cluster (Optional[str]): The unique ID of an article for triggering "All Versions" searches. |
|
hl (Optional[str]): The language to use for the Google Scholar search. |
|
lr (Optional[str]): One or multiple languages to limit the search to. |
|
start (Optional[int]): The result offset for pagination (0 is the first page of results, 10 is the 2nd page, etc.) |
|
num (Optional[int]): The maximum number of results to return, limited to 20. |
|
as_sdt (Optional[str]): Can be used either as a search type or a filter. |
|
safe (Optional[str]): The level of filtering for adult content. |
|
filter (Optional[str]): Defines if the filters for 'Similar Results' and 'Omitted Results' are on or off. |
|
as_vis (Optional[str]): Defines whether to include citations or not. |
|
|
|
Returns: |
|
:class:`dict`: article information |
|
- title: a list of the titles of the three selected papers |
|
- cited_by: a list of the citation numbers of the three selected papers |
|
- organic_id: a list of the organic results' ids of the three selected papers |
|
- pub_info: publication information of selected papers |
|
""" |
|
from serpapi import GoogleSearch |
|
|
|
params = { |
|
'q': query, |
|
'engine': 'google_scholar', |
|
'api_key': self.api_key, |
|
'cites': cites, |
|
'as_ylo': as_ylo, |
|
'as_yhi': as_yhi, |
|
'scisbd': scisbd, |
|
'cluster': cluster, |
|
'hl': hl, |
|
'lr': lr, |
|
'start': start, |
|
'num': num, |
|
'as_sdt': as_sdt, |
|
'safe': safe, |
|
'filter': filter, |
|
'as_vis': as_vis, |
|
} |
|
search = GoogleSearch(params) |
|
try: |
|
r = search.get_dict() |
|
results = r['organic_results'] |
|
title = [] |
|
snippets = [] |
|
cited_by = [] |
|
organic_id = [] |
|
pub_info = [] |
|
for item in results[:3]: |
|
title.append(item['title']) |
|
pub_info.append(item['publication_info']['summary']) |
|
citation = item['inline_links'].get('cited_by', {'total': ''}) |
|
cited_by.append(citation['total']) |
|
snippets.append(item['snippet']) |
|
organic_id.append(item['result_id']) |
|
return dict(title=title, cited_by=cited_by, organic_id=organic_id, snippets=snippets) |
|
except Exception as e: |
|
return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR) |
|
|
|
@tool_api(explode_return=True) |
|
def get_author_information( |
|
self, |
|
author_id: str, |
|
hl: Optional[str] = None, |
|
view_op: Optional[str] = None, |
|
sort: Optional[str] = None, |
|
citation_id: Optional[str] = None, |
|
start: Optional[int] = None, |
|
num: Optional[int] = None, |
|
no_cache: Optional[bool] = None, |
|
async_req: Optional[bool] = None, |
|
output: Optional[str] = None, |
|
) -> dict: |
|
"""Search for an author's information by author's id provided by get_author_id. |
|
|
|
Args: |
|
author_id (str): Required. The ID of an author. |
|
hl (Optional[str]): The language to use for the Google Scholar Author search. Default is 'en'. |
|
view_op (Optional[str]): Used for viewing specific parts of a page. |
|
sort (Optional[str]): Used for sorting and refining articles. |
|
citation_id (Optional[str]): Used for retrieving individual article citation. |
|
start (Optional[int]): Defines the result offset. Default is 0. |
|
num (Optional[int]): Defines the number of results to return. Default is 20. |
|
no_cache (Optional[bool]): Forces SerpApi to fetch the results even if a cached version is already present. Default is False. |
|
async_req (Optional[bool]): Defines the way you want to submit your search to SerpApi. Default is False. |
|
output (Optional[str]): Defines the final output you want. Default is 'json'. |
|
|
|
Returns: |
|
:class:`dict`: author information |
|
* name: author's name |
|
* affliation: the affliation of the author |
|
* articles: at most 3 articles by the author |
|
* website: the author's homepage url |
|
""" |
|
from serpapi import GoogleSearch |
|
|
|
params = { |
|
'engine': 'google_scholar_author', |
|
'author_id': author_id, |
|
'api_key': self.api_key, |
|
'hl': hl, |
|
'view_op': view_op, |
|
'sort': sort, |
|
'citation_id': citation_id, |
|
'start': start, |
|
'num': num, |
|
'no_cache': no_cache, |
|
'async': async_req, |
|
'output': output, |
|
} |
|
try: |
|
search = GoogleSearch(params) |
|
results = search.get_dict() |
|
author = results['author'] |
|
articles = results.get('articles', []) |
|
return dict( |
|
name=author['name'], |
|
affiliations=author.get('affiliations', ''), |
|
website=author.get('website', ''), |
|
articles=[dict(title=article['title'], authors=article['authors']) for article in articles[:3]], |
|
) |
|
except Exception as e: |
|
return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR) |
|
|
|
@tool_api(explode_return=True) |
|
def get_citation_format( |
|
self, |
|
q: str, |
|
no_cache: Optional[bool] = None, |
|
async_: Optional[bool] = None, |
|
output: Optional[str] = 'json', |
|
) -> dict: |
|
"""Function to get MLA citation format by an identification of organic_result's id provided by search_google_scholar. |
|
|
|
Args: |
|
q (str): ID of an individual Google Scholar organic search result. |
|
no_cache (Optional[bool]): If set to True, will force SerpApi to fetch the Google Scholar Cite results even if a cached version is already present. Defaults to None. |
|
async_ (Optional[bool]): If set to True, will submit search to SerpApi and retrieve results later. Defaults to None. |
|
output (Optional[str]): Final output format. Set to 'json' to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'. |
|
|
|
Returns: |
|
:class:`dict`: citation format |
|
* authors: the authors of the article |
|
* citation: the citation format of the article |
|
""" |
|
from serpapi import GoogleSearch |
|
|
|
params = { |
|
'q': q, |
|
'engine': 'google_scholar_cite', |
|
'api_key': self.api_key, |
|
'no_cache': no_cache, |
|
'async': async_, |
|
'output': output, |
|
} |
|
try: |
|
search = GoogleSearch(params) |
|
results = search.get_dict() |
|
citation = results['citations'] |
|
citation_info = citation[0]['snippet'] |
|
return citation_info |
|
except Exception as e: |
|
return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR) |
|
|
|
@tool_api(explode_return=True) |
|
def get_author_id( |
|
self, |
|
mauthors: str, |
|
hl: Optional[str] = 'en', |
|
after_author: Optional[str] = None, |
|
before_author: Optional[str] = None, |
|
no_cache: Optional[bool] = False, |
|
_async: Optional[bool] = False, |
|
output: Optional[str] = 'json', |
|
) -> dict: |
|
"""The getAuthorId function is used to get the author's id by his or her name. |
|
|
|
Args: |
|
mauthors (str): Defines the author you want to search for. |
|
hl (Optional[str]): Defines the language to use for the Google Scholar Profiles search. It's a two-letter language code. (e.g., 'en' for English, 'es' for Spanish, or 'fr' for French). Defaults to 'en'. |
|
after_author (Optional[str]): Defines the next page token. It is used for retrieving the next page results. The parameter has the precedence over before_author parameter. Defaults to None. |
|
before_author (Optional[str]): Defines the previous page token. It is used for retrieving the previous page results. Defaults to None. |
|
no_cache (Optional[bool]): Will force SerpApi to fetch the Google Scholar Profiles results even if a cached version is already present. Defaults to False. |
|
_async (Optional[bool]): Defines the way you want to submit your search to SerpApi. Defaults to False. |
|
output (Optional[str]): Defines the final output you want. It can be set to 'json' (default) to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'. |
|
|
|
Returns: |
|
:class:`dict`: author id |
|
* author_id: the author_id of the author |
|
""" |
|
from serpapi import GoogleSearch |
|
|
|
params = { |
|
'mauthors': mauthors, |
|
'engine': 'google_scholar_profiles', |
|
'api_key': self.api_key, |
|
'hl': hl, |
|
'after_author': after_author, |
|
'before_author': before_author, |
|
'no_cache': no_cache, |
|
'async': _async, |
|
'output': output, |
|
} |
|
try: |
|
search = GoogleSearch(params) |
|
results = search.get_dict() |
|
profile = results['profiles'] |
|
author_info = dict(author_id=profile[0]['author_id']) |
|
return author_info |
|
except Exception as e: |
|
return ActionReturn(errmsg=str(e), state=ActionStatusCode.HTTP_ERROR) |
|
|
|
|
|
class AsyncGoogleScholar(AsyncActionMixin, GoogleScholar): |
|
"""Plugin for google scholar search. |
|
|
|
Args: |
|
api_key (str): API KEY to use serper google search API, |
|
You can create a free API key at https://serper.dev. |
|
description (dict): The description of the action. Defaults to ``None``. |
|
parser (Type[BaseParser]): The parser class to process the |
|
action's inputs and outputs. Defaults to :class:`JsonParser`. |
|
""" |
|
|
|
@tool_api(explode_return=True) |
|
@asyncify |
|
def search_google_scholar( |
|
self, |
|
query: str, |
|
cites: Optional[str] = None, |
|
as_ylo: Optional[int] = None, |
|
as_yhi: Optional[int] = None, |
|
scisbd: Optional[int] = None, |
|
cluster: Optional[str] = None, |
|
hl: Optional[str] = None, |
|
lr: Optional[str] = None, |
|
start: Optional[int] = None, |
|
num: Optional[int] = None, |
|
as_sdt: Optional[str] = None, |
|
safe: Optional[str] = None, |
|
filter: Optional[str] = None, |
|
as_vis: Optional[str] = None, |
|
) -> dict: |
|
"""Search for scholarly articles based on a query according to the google scholar. |
|
|
|
Args: |
|
query (str): The query to search for. |
|
cites (Optional[str]): The unique ID of an article for triggering "Cited By" searches. |
|
as_ylo (Optional[int]): The starting year for results (e.g., if as_ylo=2018, results before this year will be omitted). |
|
as_yhi (Optional[int]): The ending year for results (e.g., if as_yhi=2018, results after this year will be omitted). |
|
scisbd (Optional[int]): Defines articles added in the last year, sorted by date. It can be set to 1 to include only abstracts, or 2 to include everything. |
|
cluster (Optional[str]): The unique ID of an article for triggering "All Versions" searches. |
|
hl (Optional[str]): The language to use for the Google Scholar search. |
|
lr (Optional[str]): One or multiple languages to limit the search to. |
|
start (Optional[int]): The result offset for pagination (0 is the first page of results, 10 is the 2nd page, etc.) |
|
num (Optional[int]): The maximum number of results to return, limited to 20. |
|
as_sdt (Optional[str]): Can be used either as a search type or a filter. |
|
safe (Optional[str]): The level of filtering for adult content. |
|
filter (Optional[str]): Defines if the filters for 'Similar Results' and 'Omitted Results' are on or off. |
|
as_vis (Optional[str]): Defines whether to include citations or not. |
|
|
|
Returns: |
|
:class:`dict`: article information |
|
- title: a list of the titles of the three selected papers |
|
- cited_by: a list of the citation numbers of the three selected papers |
|
- organic_id: a list of the organic results' ids of the three selected papers |
|
- pub_info: publication information of selected papers |
|
""" |
|
return super().search_google_scholar( |
|
query, |
|
cites, |
|
as_ylo, |
|
as_yhi, |
|
scisbd, |
|
cluster, |
|
hl, |
|
lr, |
|
start, |
|
num, |
|
as_sdt, |
|
safe, |
|
filter, |
|
as_vis, |
|
) |
|
|
|
@tool_api(explode_return=True) |
|
@asyncify |
|
def get_author_information( |
|
self, |
|
author_id: str, |
|
hl: Optional[str] = None, |
|
view_op: Optional[str] = None, |
|
sort: Optional[str] = None, |
|
citation_id: Optional[str] = None, |
|
start: Optional[int] = None, |
|
num: Optional[int] = None, |
|
no_cache: Optional[bool] = None, |
|
async_req: Optional[bool] = None, |
|
output: Optional[str] = None, |
|
) -> dict: |
|
"""Search for an author's information by author's id provided by get_author_id. |
|
|
|
Args: |
|
author_id (str): Required. The ID of an author. |
|
hl (Optional[str]): The language to use for the Google Scholar Author search. Default is 'en'. |
|
view_op (Optional[str]): Used for viewing specific parts of a page. |
|
sort (Optional[str]): Used for sorting and refining articles. |
|
citation_id (Optional[str]): Used for retrieving individual article citation. |
|
start (Optional[int]): Defines the result offset. Default is 0. |
|
num (Optional[int]): Defines the number of results to return. Default is 20. |
|
no_cache (Optional[bool]): Forces SerpApi to fetch the results even if a cached version is already present. Default is False. |
|
async_req (Optional[bool]): Defines the way you want to submit your search to SerpApi. Default is False. |
|
output (Optional[str]): Defines the final output you want. Default is 'json'. |
|
|
|
Returns: |
|
:class:`dict`: author information |
|
* name: author's name |
|
* affliation: the affliation of the author |
|
* articles: at most 3 articles by the author |
|
* website: the author's homepage url |
|
""" |
|
return super().get_author_information( |
|
author_id, hl, view_op, sort, citation_id, start, num, no_cache, async_req, output |
|
) |
|
|
|
@tool_api(explode_return=True) |
|
@asyncify |
|
def get_citation_format( |
|
self, |
|
q: str, |
|
no_cache: Optional[bool] = None, |
|
async_: Optional[bool] = None, |
|
output: Optional[str] = 'json', |
|
) -> dict: |
|
"""Function to get MLA citation format by an identification of organic_result's id provided by search_google_scholar. |
|
|
|
Args: |
|
q (str): ID of an individual Google Scholar organic search result. |
|
no_cache (Optional[bool]): If set to True, will force SerpApi to fetch the Google Scholar Cite results even if a cached version is already present. Defaults to None. |
|
async_ (Optional[bool]): If set to True, will submit search to SerpApi and retrieve results later. Defaults to None. |
|
output (Optional[str]): Final output format. Set to 'json' to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'. |
|
|
|
Returns: |
|
:class:`dict`: citation format |
|
* authors: the authors of the article |
|
* citation: the citation format of the article |
|
""" |
|
return super().get_citation_format(q, no_cache, async_, output) |
|
|
|
@tool_api(explode_return=True) |
|
@asyncify |
|
def get_author_id( |
|
self, |
|
mauthors: str, |
|
hl: Optional[str] = 'en', |
|
after_author: Optional[str] = None, |
|
before_author: Optional[str] = None, |
|
no_cache: Optional[bool] = False, |
|
_async: Optional[bool] = False, |
|
output: Optional[str] = 'json', |
|
) -> dict: |
|
"""The getAuthorId function is used to get the author's id by his or her name. |
|
|
|
Args: |
|
mauthors (str): Defines the author you want to search for. |
|
hl (Optional[str]): Defines the language to use for the Google Scholar Profiles search. It's a two-letter language code. (e.g., 'en' for English, 'es' for Spanish, or 'fr' for French). Defaults to 'en'. |
|
after_author (Optional[str]): Defines the next page token. It is used for retrieving the next page results. The parameter has the precedence over before_author parameter. Defaults to None. |
|
before_author (Optional[str]): Defines the previous page token. It is used for retrieving the previous page results. Defaults to None. |
|
no_cache (Optional[bool]): Will force SerpApi to fetch the Google Scholar Profiles results even if a cached version is already present. Defaults to False. |
|
_async (Optional[bool]): Defines the way you want to submit your search to SerpApi. Defaults to False. |
|
output (Optional[str]): Defines the final output you want. It can be set to 'json' (default) to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'. |
|
|
|
Returns: |
|
:class:`dict`: author id |
|
* author_id: the author_id of the author |
|
""" |
|
return super().get_author_id(mauthors, hl, after_author, before_author, no_cache, _async, output) |
|
|