mullvad p1
This commit is contained in:
@@ -1,9 +1,270 @@
|
||||
import webbrowser
|
||||
import requests
|
||||
from typing import Optional, Dict, List, Any
|
||||
from urllib.parse import urlencode
|
||||
from bs4 import BeautifulSoup
|
||||
from core.headers import get_useragent
|
||||
|
||||
url = "http://frogfind.com/?q="
|
||||
|
||||
def search(query: str):
|
||||
headers = {
|
||||
"User-Agent": get_useragent()
|
||||
class MullvadLetaWrapper:
|
||||
"""Wrapper for Mullvad Leta privacy-focused search engine."""
|
||||
|
||||
BASE_URL = "https://leta.mullvad.net/search"
|
||||
|
||||
# Available search engines
|
||||
ENGINES = ["brave", "google"]
|
||||
|
||||
# Available countries (from the HTML)
|
||||
COUNTRIES = [
|
||||
"ar", "au", "at", "be", "br", "ca", "cl", "cn", "dk", "fi",
|
||||
"fr", "de", "hk", "in", "id", "it", "jp", "kr", "my", "mx",
|
||||
"nl", "nz", "no", "ph", "pl", "pt", "ru", "sa", "za", "es",
|
||||
"se", "ch", "tw", "tr", "uk", "us"
|
||||
]
|
||||
|
||||
# Available languages
|
||||
LANGUAGES = [
|
||||
"ar", "bg", "ca", "zh-hans", "zh-hant", "hr", "cs", "da", "nl",
|
||||
"en", "et", "fi", "fr", "de", "he", "hu", "is", "it", "jp",
|
||||
"ko", "lv", "lt", "nb", "pl", "pt", "ro", "ru", "sr", "sk",
|
||||
"sl", "es", "sv", "tr"
|
||||
]
|
||||
|
||||
# Time filters
|
||||
TIME_FILTERS = ["d", "w", "m", "y"] # day, week, month, year
|
||||
|
||||
def __init__(self, engine: str = "brave"):
|
||||
"""
|
||||
Initialize the Mullvad Leta wrapper.
|
||||
|
||||
Args:
|
||||
engine: Search engine to use ("brave" or "google")
|
||||
"""
|
||||
if engine not in self.ENGINES:
|
||||
raise ValueError(f"Engine must be one of {self.ENGINES}")
|
||||
|
||||
self.engine = engine
|
||||
self.session = requests.Session()
|
||||
|
||||
def _get_headers(self) -> Dict[str, str]:
|
||||
"""Get request headers with user agent."""
|
||||
return {
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"cache-control": "max-age=0",
|
||||
"sec-ch-ua": '"Chromium";v="140", "Not=A?Brand";v="24"',
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": '"Linux"',
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"sec-fetch-user": "?1",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent": get_useragent()
|
||||
}
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
country: Optional[str] = None,
|
||||
language: Optional[str] = None,
|
||||
last_updated: Optional[str] = None,
|
||||
page: int = 1
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a search on Mullvad Leta.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
country: Country code filter (e.g., "us", "uk")
|
||||
language: Language code filter (e.g., "en", "fr")
|
||||
last_updated: Time filter ("d", "w", "m", "y")
|
||||
page: Page number (default: 1)
|
||||
|
||||
Returns:
|
||||
Dictionary containing search results and metadata
|
||||
"""
|
||||
if country and country not in self.COUNTRIES:
|
||||
raise ValueError(f"Invalid country code. Must be one of {self.COUNTRIES}")
|
||||
|
||||
if language and language not in self.LANGUAGES:
|
||||
raise ValueError(f"Invalid language code. Must be one of {self.LANGUAGES}")
|
||||
|
||||
if last_updated and last_updated not in self.TIME_FILTERS:
|
||||
raise ValueError(f"Invalid time filter. Must be one of {self.TIME_FILTERS}")
|
||||
|
||||
# Build query parameters
|
||||
params = {
|
||||
"q": query,
|
||||
"engine": self.engine
|
||||
}
|
||||
|
||||
if country:
|
||||
params["country"] = country
|
||||
if language:
|
||||
params["language"] = language
|
||||
if last_updated:
|
||||
params["lastUpdated"] = last_updated
|
||||
if page > 1:
|
||||
params["page"] = str(page)
|
||||
|
||||
# Set cookie for engine preference
|
||||
cookies = {"engine": self.engine}
|
||||
|
||||
# Make request
|
||||
response = self.session.get(
|
||||
self.BASE_URL,
|
||||
params=params,
|
||||
headers=self._get_headers(),
|
||||
cookies=cookies,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse results
|
||||
return self._parse_results(response.text, query, page)
|
||||
|
||||
def _parse_results(self, html: str, query: str, page: int) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse HTML response and extract search results.
|
||||
|
||||
Args:
|
||||
html: HTML response content
|
||||
query: Original search query
|
||||
page: Current page number
|
||||
|
||||
Returns:
|
||||
Dictionary containing parsed results
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
results = {
|
||||
"query": query,
|
||||
"page": page,
|
||||
"engine": self.engine,
|
||||
"results": [],
|
||||
"infobox": None,
|
||||
"news": [],
|
||||
"cached": False
|
||||
}
|
||||
|
||||
# Check if cached
|
||||
cache_notice = soup.find('p', class_='small')
|
||||
if cache_notice and 'cached' in cache_notice.text.lower():
|
||||
results["cached"] = True
|
||||
|
||||
# Extract regular search results
|
||||
articles = soup.find_all('article', class_='svelte-fmlk7p')
|
||||
for article in articles:
|
||||
result = self._parse_article(article)
|
||||
if result:
|
||||
results["results"].append(result)
|
||||
|
||||
# Extract infobox if present
|
||||
infobox_div = soup.find('div', class_='infobox')
|
||||
if infobox_div:
|
||||
results["infobox"] = self._parse_infobox(infobox_div)
|
||||
|
||||
# Extract news results
|
||||
news_div = soup.find('div', class_='news')
|
||||
if news_div:
|
||||
news_articles = news_div.find_all('article')
|
||||
for article in news_articles:
|
||||
news_item = self._parse_news_article(article)
|
||||
if news_item:
|
||||
results["news"].append(news_item)
|
||||
|
||||
# Check for next page
|
||||
next_button = soup.find('button', {'data-cy': 'next-button'})
|
||||
results["has_next_page"] = next_button is not None
|
||||
|
||||
return results
|
||||
|
||||
def _parse_article(self, article) -> Optional[Dict[str, str]]:
|
||||
"""Parse a single search result article."""
|
||||
try:
|
||||
link_tag = article.find('a', href=True)
|
||||
if not link_tag:
|
||||
return None
|
||||
|
||||
title_tag = article.find('h3')
|
||||
snippet_tag = article.find('p', class_='result__body')
|
||||
cite_tag = article.find('cite')
|
||||
|
||||
return {
|
||||
"url": link_tag['href'],
|
||||
"title": title_tag.get_text(strip=True) if title_tag else "",
|
||||
"snippet": snippet_tag.get_text(strip=True) if snippet_tag else "",
|
||||
"display_url": cite_tag.get_text(strip=True) if cite_tag else ""
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Error parsing article: {e}")
|
||||
return None
|
||||
|
||||
def _parse_infobox(self, infobox_div) -> Dict[str, Any]:
|
||||
"""Parse infobox information."""
|
||||
infobox = {}
|
||||
|
||||
title_tag = infobox_div.find('h1')
|
||||
if title_tag:
|
||||
infobox["title"] = title_tag.get_text(strip=True)
|
||||
|
||||
subtitle_tag = infobox_div.find('h2')
|
||||
if subtitle_tag:
|
||||
infobox["subtitle"] = subtitle_tag.get_text(strip=True)
|
||||
|
||||
url_tag = infobox_div.find('a', rel='noreferrer')
|
||||
if url_tag:
|
||||
infobox["url"] = url_tag['href']
|
||||
|
||||
desc_tag = infobox_div.find('p')
|
||||
if desc_tag:
|
||||
infobox["description"] = desc_tag.get_text(strip=True)
|
||||
|
||||
return infobox
|
||||
|
||||
def _parse_news_article(self, article) -> Optional[Dict[str, str]]:
|
||||
"""Parse a news article."""
|
||||
try:
|
||||
link_tag = article.find('a', href=True)
|
||||
if not link_tag:
|
||||
return None
|
||||
|
||||
title_tag = link_tag.find('h3')
|
||||
cite_tag = link_tag.find('cite')
|
||||
time_tag = link_tag.find('time')
|
||||
|
||||
return {
|
||||
"url": link_tag['href'],
|
||||
"title": title_tag.get_text(strip=True) if title_tag else "",
|
||||
"source": cite_tag.get_text(strip=True) if cite_tag else "",
|
||||
"timestamp": time_tag['datetime'] if time_tag and time_tag.has_attr('datetime') else ""
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Error parsing news article: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Create wrapper instance
|
||||
leta = MullvadLetaWrapper(engine="brave")
|
||||
|
||||
# Perform a search
|
||||
results = leta.search("python programming", country="us", language="en")
|
||||
|
||||
# Display results
|
||||
print(f"Query: {results['query']}")
|
||||
print(f"Engine: {results['engine']}")
|
||||
print(f"Cached: {results['cached']}")
|
||||
print(f"\nFound {len(results['results'])} results:\n")
|
||||
|
||||
for i, result in enumerate(results['results'][:5], 1):
|
||||
print(f"{i}. {result['title']}")
|
||||
print(f" URL: {result['url']}")
|
||||
print(f" {result['snippet'][:100]}...\n")
|
||||
|
||||
if results['news']:
|
||||
print(f"\nNews ({len(results['news'])} items):")
|
||||
for news in results['news'][:3]:
|
||||
print(f"- {news['title']}")
|
||||
print(f" {news['source']}\n")
|
||||
11
main.py
11
main.py
@@ -4,11 +4,11 @@ from pathlib import Path
|
||||
from PySide6 import QtCore, QtGui, QtWidgets
|
||||
|
||||
from core.file_search import find
|
||||
from core.web_search import search
|
||||
from core.web_search import MullvadLetaWrapper
|
||||
|
||||
ASSET = Path(__file__).parent / "assets" / "2ktan.png"
|
||||
|
||||
class SearchResultsDialog(QtWidgets.QDialog):
|
||||
class FileSearchResults(QtWidgets.QDialog):
|
||||
def __init__(self, results, parent=None):
|
||||
super().__init__(parent)
|
||||
self.setWindowTitle("Search Results")
|
||||
@@ -115,7 +115,7 @@ class MainWindow(QtWidgets.QMainWindow):
|
||||
QtWidgets.QApplication.restoreOverrideCursor()
|
||||
|
||||
if results:
|
||||
self.results_dialog = SearchResultsDialog(results, self)
|
||||
self.results_dialog = FileSearchResults(results, self)
|
||||
self.results_dialog.show()
|
||||
else:
|
||||
reply = QtWidgets.QMessageBox.question(self, "No Results", "Sorry, I couldn't find anything in your home folder. Would you like me to search the root folder?",
|
||||
@@ -131,7 +131,7 @@ class MainWindow(QtWidgets.QMainWindow):
|
||||
QtWidgets.QApplication.restoreOverrideCursor()
|
||||
|
||||
if results:
|
||||
self.results_dialog = SearchResultsDialog(results, self)
|
||||
self.results_dialog = FileSearchResults(results, self)
|
||||
self.results_dialog.show()
|
||||
else:
|
||||
QtWidgets.QMessageBox.information(self, "No Results", "Sorry, I couldn't find anything in the root folder either.")
|
||||
@@ -141,7 +141,8 @@ class MainWindow(QtWidgets.QMainWindow):
|
||||
if ok and query:
|
||||
try:
|
||||
QtWidgets.QApplication.setOverrideCursor(QtCore.Qt.WaitCursor) #type: ignore
|
||||
search(query)
|
||||
leta = MullvadLetaWrapper(engine="brave")
|
||||
results = leta.search(query)
|
||||
except RuntimeError as e:
|
||||
QtWidgets.QMessageBox.critical(self, "Search Error", str(e))
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user