-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1.2 - January 2, 2024 - [new] Preferred language setting for DuckDuckGo results in config.php. - [new] Preferred language setting for Wikipedia results in config.php. - [new] Combined DuckDuckGo, Google, Wikipedia and Ecosia (Bing) results into one page. - [new] Ranking algorithm for search results. - [new] Option to down-rank certain social media sites in results (Makes them show lower down the page). - [new] Option to show the Goosle rank along with the search source. - [new] Crawler for results from Limetorrents.lol. - [new] Periodic check for updates in footer. - [change] Moved duckduckgo.php and google.php into the engines/search/ folder. - [change] Removed Wikipedia special search in favor of actual search results. - [change] Removed 'Date Added' from 1337x results. - [change] Removed Chrome based and Mobile user-agents, as they don't work for the WikiPedia API. - [change] Added more trackers for generating magnet links. - [tweak] 30-50% faster parsing of search results (couple of ms per search query). - [tweak] Expanded the season/episode filter to all sources that support TV Shows. - [tweak] More sensible santization of variables (Searching for html tags/basic code should now work). - [tweak] Moved 'imdb_id_search' out from special results into its 'own' setting. - [tweak] Moved 'password_generator' out from special results into its 'own' setting. - [tweak] More accurate and faster Google scrape. - [tweak] Reduced paragraph margins. - [tweak] More code cleanup, making it more uniform. - [fix] Prevents searching on disabled methods by 'cheating' the search type in the url. - [fix] Better decoding for special characters in urls for search results. - [fix] Better validation for special searches trigger words. - [fix] Better sanitization for DuckDuckGo and Google results.
- Loading branch information
Arnan de Gans
committed
Jan 2, 2024
1 parent
86d0d57
commit 92a70e6
Showing
24 changed files
with
1,244 additions
and
556 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
<?php | ||
/* ------------------------------------------------------------------------------------ | ||
* Goosle - A meta search engine for private and fast internet fun. | ||
* | ||
* COPYRIGHT NOTICE | ||
* Copyright 2023-2024 Arnan de Gans. All Rights Reserved. | ||
* | ||
* COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. | ||
* By using this code you agree to indemnify Arnan de Gans from any | ||
* liability that might arise from its use. | ||
------------------------------------------------------------------------------------ */ | ||
class YahooImageRequest extends EngineRequest { | ||
public function get_request_url() { | ||
// Split the query | ||
$query_terms = explode(" ", strtolower($this->query)); | ||
|
||
// Size override | ||
$size = ""; | ||
if($query_terms[0] == 'size') { | ||
$switch = explode(":", $query_terms[0]); | ||
|
||
if((strlen($switch[1]) >= 3 && strlen($switch[1]) <= 6) && !is_numeric($switch[1])) { | ||
if($switch[1] == "med") $switch[1] = "medium"; | ||
if($switch[1] == "lrg") $switch[1] = "large"; | ||
if($switch[1] == "xlrg") $switch[1] = "wallpaper"; | ||
|
||
if($switch[1] == "small" || $switch[1] == "medium" || $switch[1] == "large" || $switch[1] == "wallpaper") { | ||
$size = $switch[1]; | ||
} | ||
|
||
$this->query = implode(" ", array_slice($query_terms, 1)); | ||
} | ||
} | ||
|
||
// p = query | ||
// imgsz = Image size (small|medium|large|wallpaper) | ||
|
||
$args = array("p" => $this->query, "imgsz" => $size); | ||
$url = "https://images.search.yahoo.com/search/images?".http_build_query($args); | ||
|
||
unset($query_terms, $switch, $args, $size); | ||
|
||
return $url; | ||
} | ||
|
||
public function parse_results($response) { | ||
$results = array(); | ||
$xpath = get_xpath($response); | ||
|
||
if(!$xpath) return array(); | ||
|
||
// Scrape recommended | ||
$didyoumean = $xpath->query(".//section[@class='dym-c']/section/h3/a")[0]; | ||
if(!is_null($didyoumean)) { | ||
$results['did_you_mean'] = $didyoumean->textContent; | ||
} | ||
$search_specific = $xpath->query(".//section[@class='dym-c']/section/h5/a")[0]; | ||
if(!is_null($search_specific)) { | ||
$results['search_specific'] = $search_specific->textContent; | ||
} | ||
|
||
// Scrape the results | ||
$scrape = $xpath->query("//li[contains(@class, 'ld') and not(contains(@class, 'slotting'))][position() < 101]"); | ||
$rank = $results['amount'] = count($scrape); | ||
foreach($scrape as $result) { | ||
$image = $xpath->evaluate(".//img/@src", $result)[0]; | ||
if($image == null) continue; | ||
|
||
$url_data = $xpath->evaluate(".//a/@href", $result)[0]; | ||
if($url_data == null) continue; | ||
|
||
// Get meta data | ||
// -- Relevant $url_data (there is more, but unused by Goosle) | ||
// w = Image width (1280) | ||
// h = Image height (720) | ||
// imgurl = Actual full size image (Used in Yahoo preview/popup) | ||
// rurl = Url to page where the image is used | ||
// size = Image size (413.1KB) | ||
// tt = Website title (Used for image alt text) | ||
parse_str($url_data->textContent, $url_data); | ||
|
||
// Deal with optional or missing data | ||
$dimensions_w = (!array_key_exists('w', $url_data) || empty($url_data['w'])) ? "" : htmlspecialchars($url_data['w']); | ||
$dimensions_h = (!array_key_exists('h', $url_data) || empty($url_data['h'])) ? "" : htmlspecialchars($url_data['h']); | ||
$filesize = (!array_key_exists('size', $url_data) || empty($url_data['size'])) ? "" : htmlspecialchars($url_data['size']); | ||
$link = (!array_key_exists('imgurl', $url_data) || empty($url_data['imgurl'])) ? "" : "//".htmlspecialchars($url_data['imgurl']); | ||
|
||
// Process result | ||
$image = htmlspecialchars($image->textContent); | ||
$url = htmlspecialchars($url_data['rurl']); | ||
$alt = htmlspecialchars($url_data['tt']); | ||
|
||
// filter duplicate urls/results | ||
if(!empty($results['search'])) { | ||
$result_urls = array_column($results['search'], "direct_link"); | ||
if(in_array($link, $result_urls)) continue; | ||
} | ||
|
||
$id = uniqid(rand(0, 9999)); | ||
|
||
$results['search'][] = array ("id" => $id, "source" => "Yahoo! Images", "image" => $image, "alt" => $alt, "url" => $url, "width" => $dimensions_w, "height" => $dimensions_h, "filesize" => $filesize, "direct_link" => $link, "engine_rank" => $rank); | ||
$rank -= 1; | ||
} | ||
|
||
// Add error if there are no search results | ||
if(empty($results['search'])) { | ||
$results['error'] = array( | ||
"message" => "No results found. Please try with less or different keywords!" | ||
); | ||
} | ||
|
||
return $results; | ||
} | ||
} | ||
?> |
Oops, something went wrong.