wikiquote_scraper.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import requests
  2. import json
  3. import re
  4. import sys
  5. def get_wikiquote_quotes(person, limit=50):
  6. base_url = "https://en.wikiquote.org/w/api.php"
  7. # Step 1: Get the page ID
  8. search_params = {
  9. "action": "query",
  10. "format": "json",
  11. "list": "search",
  12. "srsearch": person
  13. }
  14. response = requests.get(base_url, params=search_params).json()
  15. if "query" not in response or not response["query"]["search"]:
  16. print("No results found for", person)
  17. return []
  18. page_title = response["query"]["search"][0]["title"]
  19. # Step 2: Get the page content in JSON format
  20. content_params = {
  21. "action": "query",
  22. "format": "json",
  23. "prop": "extracts",
  24. "explaintext": True,
  25. "titles": page_title
  26. }
  27. content_response = requests.get(base_url, params=content_params).json()
  28. pages = content_response.get("query", {}).get("pages", {})
  29. if not pages:
  30. print("Failed to retrieve content for", person)
  31. return []
  32. page_content = next(iter(pages.values())).get("extract", "")
  33. # Step 3: Extract quotes (lines longer than 150 characters)
  34. quotes = [line for line in page_content.split("\n") if len(line) > 150]
  35. return quotes[:limit] # Limit number of quotes returned
  36. def save_quotes_to_file(person, quotes):
  37. filename = f"{person.replace(' ', '_')}_quotes.txt"
  38. with open(filename, "w", encoding="utf-8") as file:
  39. for quote in quotes:
  40. file.write(f"{quote}\n\t\t-- {person}\n%\n")
  41. print(f"Saved {len(quotes)} quotes to {filename}")
  42. if __name__ == "__main__":
  43. if len(sys.argv) < 2:
  44. print("Usage: python script.py \"Person Name\"")
  45. sys.exit(1)
  46. person = sys.argv[1]
  47. quotes = get_wikiquote_quotes(person)
  48. if quotes:
  49. save_quotes_to_file(person, quotes)
  50. else:
  51. print("No quotes found.")