wikifit / src /wikimedia.py
KarthikGarlapati's picture
Upload 9 files
b70c076 verified
"""
WikiFit - Wikimedia API Integration Module
This module provides functions to interact with various Wikimedia APIs
to retrieve health and fitness information.
"""
import requests
import logging
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Cache durations (in seconds)
CACHE_TTL = 3600 # 1 hour
def get_wikipedia_summary(term):
"""
Get a summary of a topic from Wikipedia.
Args:
term: The search term/topic
Returns:
str: Summary text or error message
"""
try:
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{term}"
response = requests.get(url, timeout=10)
if response.status_code == 200:
data = response.json()
extract = data.get("extract", "")
if not extract:
# Check if we have an alternative like disambiguation
if data.get("type") == "disambiguation":
return f"'{term}' refers to multiple topics. Please try a more specific search term."
return "No summary found. This topic might not have an article on Wikipedia yet."
return extract
elif response.status_code == 404:
return f"The topic '{term}' was not found on Wikipedia. Please check spelling or try another term."
else:
logging.error(f"Wikipedia API error: {response.status_code} for term '{term}'")
return f"Error retrieving information: HTTP {response.status_code}"
except requests.RequestException as e:
logging.error(f"Wikipedia request error for '{term}': {str(e)}")
return "Connection error. Please check your internet connection and try again later."
def get_wiktionary_definition(term):
"""Get word definitions from Wiktionary"""
try:
url = "https://en.wiktionary.org/w/api.php"
params = {
"action": "query",
"format": "json",
"titles": term,
"prop": "extracts",
"exsectionformat": "plain",
"exsentences": 5,
"explaintext": True
}
response = requests.get(url, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
pages = data.get("query", {}).get("pages", {})
# Extract the first page content (there should only be one)
for page_id in pages:
if "extract" in pages[page_id]:
return pages[page_id]["extract"]
return "No definition found."
else:
return f"Error retrieving definition: HTTP {response.status_code}"
except requests.RequestException as e:
logging.error(f"Wiktionary request error: {str(e)}")
return "Connection error. Please try again later."
def get_wikiquote_quotes(term):
"""Get quotes related to a topic from Wikiquote"""
try:
url = "https://en.wikiquote.org/w/api.php"
params = {
"action": "query",
"format": "json",
"titles": term,
"prop": "extracts",
"exsentences": 5,
"explaintext": True
}
response = requests.get(url, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
pages = data.get("query", {}).get("pages", {})
# Extract the first page content (there should only be one)
for page_id in pages:
if int(page_id) > 0 and "extract" in pages[page_id]: # Skip missing pages
content = pages[page_id]["extract"].strip()
if content:
return content
return "No quotes found for this topic."
else:
return f"Error retrieving quotes: HTTP {response.status_code}"
except requests.RequestException as e:
logging.error(f"Wikiquote request error: {str(e)}")
return "Connection error. Please try again later."
def get_wikibooks_content(term):
"""Get educational content from Wikibooks"""
try:
url = "https://en.wikibooks.org/w/api.php"
params = {
"action": "query",
"format": "json",
"titles": term,
"prop": "extracts",
"exsentences": 10,
"explaintext": True
}
response = requests.get(url, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
pages = data.get("query", {}).get("pages", {})
# Extract the first page content
for page_id in pages:
if int(page_id) > 0 and "extract" in pages[page_id]:
return pages[page_id]["extract"]
return "No Wikibooks content found for this topic."
else:
return f"Error retrieving content: HTTP {response.status_code}"
except requests.RequestException as e:
logging.error(f"Wikibooks request error: {str(e)}")
return "Connection error. Please try again later."
def get_wikimedia_commons_images(term, limit=5):
"""Get relevant images from Wikimedia Commons"""
try:
url = "https://commons.wikimedia.org/w/api.php"
params = {
"action": "query",
"format": "json",
"list": "search",
"srsearch": f"{term} haswbstatement:P180={term}", # Search for images with the term as subject
"srnamespace": 6, # File namespace
"srlimit": limit
}
response = requests.get(url, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
search_results = data.get("query", {}).get("search", [])
image_titles = []
for result in search_results:
if "title" in result:
image_titles.append(result["title"])
# If we found images, get their URLs
image_data = []
if image_titles:
file_titles = "|".join(image_titles)
image_params = {
"action": "query",
"format": "json",
"titles": file_titles,
"prop": "imageinfo",
"iiprop": "url|extmetadata",
"iiurlwidth": 300 # Thumbnail width
}
img_response = requests.get(url, params=image_params, timeout=10)
if img_response.status_code == 200:
img_data = img_response.json()
pages = img_data.get("query", {}).get("pages", {})
for page_id in pages:
page = pages[page_id]
if "imageinfo" in page and page["imageinfo"]:
info = page["imageinfo"][0]
title = page.get("title", "").replace("File:", "")
thumb_url = info.get("thumburl", "")
description = info.get("extmetadata", {}).get("ImageDescription", {}).get("value", "")
# Clean HTML from description
description = description.replace("<p>", "").replace("</p>", "")
if thumb_url:
image_data.append({
"title": title,
"url": thumb_url,
"description": description
})
return image_data
else:
logging.error(f"Wikimedia Commons API error: {response.status_code} for term '{term}'")
return []
except requests.RequestException as e:
logging.error(f"Wikimedia Commons request error: {str(e)}")
return []
def get_wikisource_texts(term):
"""Get health-related texts from Wikisource"""
try:
url = "https://en.wikisource.org/w/api.php"
params = {
"action": "query",
"format": "json",
"list": "search",
"srsearch": term,
"srlimit": 3
}
response = requests.get(url, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
search_results = data.get("query", {}).get("search", [])
text_data = []
for result in search_results:
title = result.get("title", "")
snippet = result.get("snippet", "").replace("<span class=\"searchmatch\">", "").replace("</span>", "")
text_data.append({
"title": title,
"snippet": snippet
})
return text_data
else:
logging.error(f"Wikisource API error: {response.status_code} for term '{term}'")
return []
except requests.RequestException as e:
logging.error(f"Wikisource request error: {str(e)}")
return []
def get_wikiversity_resources(term):
"""Get educational resources from Wikiversity"""
try:
url = "https://en.wikiversity.org/w/api.php"
params = {
"action": "query",
"format": "json",
"titles": term,
"prop": "extracts",
"exsentences": 5,
"explaintext": True
}
response = requests.get(url, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
pages = data.get("query", {}).get("pages", {})
# Extract the first page content
for page_id in pages:
if int(page_id) > 0 and "extract" in pages[page_id]:
return pages[page_id]["extract"]
return "No Wikiversity resources found for this topic."
else:
return f"Error retrieving resources: HTTP {response.status_code}"
except requests.RequestException as e:
logging.error(f"Wikiversity request error: {str(e)}")
return "Connection error. Please try again later."
def get_wikispecies_info(species_name):
"""Get species information from Wikispecies"""
try:
url = "https://species.wikimedia.org/w/api.php"
params = {
"action": "query",
"format": "json",
"titles": species_name,
"prop": "extracts",
"explaintext": True
}
response = requests.get(url, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
pages = data.get("query", {}).get("pages", {})
# Extract the first page content
for page_id in pages:
if int(page_id) > 0 and "extract" in pages[page_id]:
return pages[page_id]["extract"]
return "No species information found."
else:
return f"Error retrieving species information: HTTP {response.status_code}"
except requests.RequestException as e:
logging.error(f"Wikispecies request error: {str(e)}")
return "Connection error. Please try again later."
def get_wikidata_health_info(term):
"""Get structured health data from Wikidata"""
try:
# First, find the Wikidata ID for the term
url = "https://www.wikidata.org/w/api.php"
params = {
"action": "wbsearchentities",
"format": "json",
"search": term,
"language": "en"
}
response = requests.get(url, params=params, timeout=10)
if response.status_code == 200:
data = response.json()
search_results = data.get("search", [])
if not search_results:
return "No Wikidata information found for this term."
# Get the first result's ID
entity_id = search_results[0].get("id")
# Now get the entity data
entity_params = {
"action": "wbgetentities",
"format": "json",
"ids": entity_id,
"languages": "en"
}
entity_response = requests.get(url, params=entity_params, timeout=10)
if entity_response.status_code == 200:
entity_data = entity_response.json()
entities = entity_data.get("entities", {})
if entity_id in entities:
entity = entities[entity_id]
# Extract label and description
label = entity.get("labels", {}).get("en", {}).get("value", "No label")
description = entity.get("descriptions", {}).get("en", {}).get("value", "No description")
# Extract some claims/properties
claims = entity.get("claims", {})
properties = {}
# Common health-related properties
property_map = {
"P2175": "medical condition treated",
"P2176": "drug used for treatment",
"P780": "symptoms",
"P1050": "medical condition",
"P1995": "health specialty"
}
for prop_id, prop_name in property_map.items():
if prop_id in claims:
values = []
for claim in claims[prop_id]:
mainsnak = claim.get("mainsnak", {})
if mainsnak.get("datatype") == "wikibase-item" and "datavalue" in mainsnak:
value_id = mainsnak["datavalue"]["value"]["id"]
values.append(value_id)
if values:
properties[prop_name] = values
return {
"label": label,
"description": description,
"properties": properties
}
return "No detailed Wikidata information available."
else:
logging.error(f"Wikidata API error: {response.status_code} for term '{term}'")
return f"Error retrieving Wikidata: HTTP {response.status_code}"
except requests.RequestException as e:
logging.error(f"Wikidata request error: {str(e)}")
return "Connection error. Please try again later."
# Add a unified search function to search across all Wikimedia platforms
def search_all_wikimedia(term):
"""
Search for a term across all Wikimedia platforms.
Args:
term: Search term
Returns:
dict: Results from all Wikimedia sources
"""
# Normalize the term
search_term = term.strip().replace(" ", "_")
# Create a results dictionary
results = {
"wikipedia": None,
"wiktionary": None,
"wikiquote": None,
"wikibooks": None,
"commons": None,
"wikisource": None,
"wikiversity": None,
"wikispecies": None,
"wikidata": None
}
# Get results from each platform
results["wikipedia"] = get_wikipedia_summary(search_term)
results["wiktionary"] = get_wiktionary_definition(search_term)
results["wikiquote"] = get_wikiquote_quotes(search_term)
results["wikibooks"] = get_wikibooks_content(search_term)
results["commons"] = get_wikimedia_commons_images(search_term)
results["wikisource"] = get_wikisource_texts(search_term)
results["wikiversity"] = get_wikiversity_resources(search_term)
results["wikispecies"] = get_wikispecies_info(search_term)
results["wikidata"] = get_wikidata_health_info(search_term)
return results