Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Semantic Scholar integration #50

Merged
merged 3 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions App_Function_Libraries/Gradio_Related.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from App_Function_Libraries.Gradio_UI.RAG_Chat_tab import create_rag_tab
from App_Function_Libraries.Gradio_UI.Embeddings_tab import create_embeddings_tab, create_view_embeddings_tab, \
create_purge_embeddings_tab
from App_Function_Libraries.Gradio_UI.Semantic_Scholar_tab import create_semantic_scholar_tab
from App_Function_Libraries.Gradio_UI.Trash import create_view_trash_tab, create_empty_trash_tab, \
create_delete_trash_tab, create_search_and_mark_trash_tab
from App_Function_Libraries.Gradio_UI.Utilities import create_utilities_yt_timestamp_tab, create_utilities_yt_audio_tab, \
Expand Down Expand Up @@ -287,6 +288,7 @@ def launch_ui(share_public=None, server_mode=False):
create_summarize_explain_tab()
create_live_recording_tab()
create_arxiv_tab()
create_semantic_scholar_tab()

with gr.TabItem("Text Search", id="text search", visible=True):
create_search_tab()
Expand Down
184 changes: 184 additions & 0 deletions App_Function_Libraries/Gradio_UI/Semantic_Scholar_tab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
# Sematnic_Scholar_tab.py
# Description: contains the code to create the Semantic Scholar tab in the Gradio UI.
#
# Imports
#
# External Libraries
import gradio as gr
#
# Internal Libraries
from App_Function_Libraries.Third_Party.Semantic_Scholar import search_and_display, FIELDS_OF_STUDY, PUBLICATION_TYPES


#
######################################################################################################################
# Functions
def create_semantic_scholar_tab():
"""Create the Semantic Scholar tab for the Gradio UI"""
with gr.Tab("Semantic Scholar Search"):
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("""
## Semantic Scholar Paper Search

This interface allows you to search for academic papers using the Semantic Scholar API with advanced filtering options:

### Search Options
- **Keywords**: Search across titles, abstracts, and other paper content
- **Year Range**: Filter papers by publication year (e.g., "2020-2023" or "2020")
- **Venue**: Filter by publication venue (journal or conference)
- **Minimum Citations**: Filter papers by minimum citation count
- **Fields of Study**: Filter papers by academic field
- **Publication Types**: Filter by type of publication
- **Open Access**: Option to show only papers with free PDF access

### Results Include
- Paper title
- Author list
- Publication year and venue
- Citation count
- Publication types
- Abstract
- Links to PDF (when available) and Semantic Scholar page
""")
with gr.Column(scale=2):
gr.Markdown("""
### Pagination
- 10 results per page
- Navigate through results using Previous/Next buttons
- Current page number and total results displayed

### Usage Tips
- Combine multiple filters for more specific results
- Use specific terms for more focused results
- Try different combinations of filters if you don't find what you're looking for
""")
with gr.Row():
with gr.Column(scale=2):
search_input = gr.Textbox(
label="Search Query",
placeholder="Enter keywords to search for papers...",
lines=1
)

# Advanced search options
with gr.Row():
year_range = gr.Textbox(
label="Year Range",
placeholder="e.g., 2020-2023 or 2020",
lines=1
)
venue = gr.Textbox(
label="Venue",
placeholder="e.g., Nature, Science",
lines=1
)
min_citations = gr.Number(
label="Minimum Citations",
value=0,
minimum=0,
step=1
)

with gr.Row():
fields_of_study = gr.Dropdown(
choices=FIELDS_OF_STUDY,
label="Fields of Study",
multiselect=True,
value=[]
)
publication_types = gr.Dropdown(
choices=PUBLICATION_TYPES,
label="Publication Types",
multiselect=True,
value=[]
)

open_access_only = gr.Checkbox(
label="Open Access Only",
value=False
)

with gr.Column(scale=1):
search_button = gr.Button("Search", variant="primary")

# Pagination controls
with gr.Row():
prev_button = gr.Button("← Previous")
current_page = gr.Number(value=0, label="Page", minimum=0, step=1)
max_page = gr.Number(value=0, label="Max Page", visible=False)
next_button = gr.Button("Next →")

total_results = gr.Textbox(
label="Total Results",
value="0",
interactive=False
)

output_text = gr.Markdown(
label="Results",
value="Use the search options above to find papers."
)

def update_page(direction, current, maximum):
new_page = current + direction
if new_page < 0:
return 0
if new_page > maximum:
return maximum
return new_page

# Handle search and pagination
def search_from_button(query, fields_of_study, publication_types, year_range, venue, min_citations,
open_access_only):
"""Wrapper to always search from page 0 when search button is clicked"""
return search_and_display(
query=query,
page=0, # Force page 0 for new searches
fields_of_study=fields_of_study,
publication_types=publication_types,
year_range=year_range,
venue=venue,
min_citations=min_citations,
open_access_only=open_access_only
)
normal_search = search_and_display

search_button.click(
fn=search_from_button,
inputs=[
search_input, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
],
outputs=[output_text, current_page, max_page, total_results]
)

prev_button.click(
fn=lambda curr, max_p: update_page(-1, curr, max_p),
inputs=[current_page, max_page],
outputs=current_page
).then(
fn=normal_search,
inputs=[
search_input, current_page, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
],
outputs=[output_text, current_page, max_page, total_results]
)

next_button.click(
fn=lambda curr, max_p: update_page(1, curr, max_p),
inputs=[current_page, max_page],
outputs=current_page
).then(
fn=normal_search,
inputs=[
search_input, current_page, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
],
outputs=[output_text, current_page, max_page, total_results]
)

#
# End of Semantic_Scholar_tab.py
######################################################################################################################
162 changes: 162 additions & 0 deletions App_Function_Libraries/Third_Party/Semantic_Scholar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# Semantic_Scholar.py
# Description: This file contains the functions to interact with the Semantic Scholar API
#
# Imports
from typing import List, Dict, Any

import requests
#
####################################################################################################
#
# Functions

# Constants
FIELDS_OF_STUDY = [
"Computer Science", "Medicine", "Chemistry", "Biology", "Materials Science",
"Physics", "Geology", "Psychology", "Art", "History", "Geography",
"Sociology", "Business", "Political Science", "Economics", "Philosophy",
"Mathematics", "Engineering", "Environmental Science",
"Agricultural and Food Sciences", "Education", "Law", "Linguistics"
]

PUBLICATION_TYPES = [
"Review", "JournalArticle", "CaseReport", "ClinicalTrial", "Conference",
"Dataset", "Editorial", "LettersAndComments", "MetaAnalysis", "News",
"Study", "Book", "BookSection"
]


def search_papers(
query: str,
page: int,
fields_of_study: List[str],
publication_types: List[str],
year_range: str,
venue: str,
min_citations: int,
open_access_only: bool,
limit: int = 10
) -> Dict[str, Any]:
"""Search for papers using the Semantic Scholar API with all available filters"""
if not query.strip():
return {"total": 0, "offset": 0, "next": 0, "data": []}

try:
url = "https://api.semanticscholar.org/graph/v1/paper/search"
params = {
"query": query,
"offset": page * limit,
"limit": limit,
"fields": "title,abstract,year,citationCount,authors,venue,openAccessPdf,url,publicationTypes,publicationDate"
}

# Add optional filters
if fields_of_study:
params["fieldsOfStudy"] = ",".join(fields_of_study)
if publication_types:
params["publicationTypes"] = ",".join(publication_types)
if venue:
params["venue"] = venue
if min_citations:
params["minCitationCount"] = str(min_citations)
if open_access_only:
params["openAccessPdf"] = ""
if year_range:
try:
if "-" in year_range:
start_year, end_year = year_range.split("-")
params["year"] = f"{start_year.strip()}-{end_year.strip()}"
else:
params["year"] = year_range.strip()
except ValueError:
pass

response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"API Error: {str(e)}", "total": 0, "offset": 0, "data": []}


def get_paper_details(paper_id):
"""Get detailed information about a specific paper"""
try:
url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}"
params = {
"fields": "title,abstract,year,citationCount,authors,venue,openAccessPdf,url,references,citations"
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"API Error: {str(e)}"}


def format_paper_info(paper: Dict[str, Any]) -> str:
"""Format paper information for display"""
authors = ", ".join([author["name"] for author in paper.get("authors", [])])
year = f"Year: {paper.get('year', 'N/A')}"
venue = f"Venue: {paper.get('venue', 'N/A')}"
citations = f"Citations: {paper.get('citationCount', 0)}"
pub_types = f"Types: {', '.join(paper.get('publicationTypes', ['N/A']))}"

pdf_link = ""
if paper.get("openAccessPdf"):
pdf_link = f"\nPDF: {paper['openAccessPdf']['url']}"

s2_link = f"\nSemantic Scholar: {paper.get('url', '')}"

formatted = f"""# {paper.get('title', 'No Title')}

Authors: {authors}
{year} | {venue} | {citations}
{pub_types}

Abstract:
{paper.get('abstract', 'No abstract available')}

Links:{pdf_link}{s2_link}
"""
return formatted


def search_and_display(
query: str,
page: int,
fields_of_study: List[str],
publication_types: List[str],
year_range: str,
venue: str,
min_citations: int,
open_access_only: bool
) -> tuple[str, int, int, str]:
"""Search for papers and return formatted results with pagination info"""
result = search_papers(
query, page, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
)

if "error" in result:
return result["error"], 0, 0, "0"

if not result["data"]:
return "No results found.", 0, 0, "0"

papers = result["data"]
total_results = int(result.get("total", "0"))
max_pages = (total_results + 9) // 10 # Ceiling division

results = []
for paper in papers:
results.append(format_paper_info(paper))

formatted_results = "\n\n---\n\n".join(results)

# Add pagination information
pagination_info = f"\n\n---\n\nShowing results {result['offset'] + 1}-{result['offset'] + len(papers)} of {total_results}"

return formatted_results + pagination_info, page, max_pages - 1, str(total_results)

#
# End of Semantic_Scholar.py
####################################################################################################
Loading