From 95462b993bc52abbd74fce7c023972b32a7a8c99 Mon Sep 17 00:00:00 2001 From: Alexander Yakovlev Date: Sun, 5 Apr 2020 14:41:54 +0700 Subject: [PATCH] Steam scraper --- app/Commands/Collect.php | 4 +- app/Downloader.php | 2 +- app/Source.php | 2 +- app/Sources/Steam.php | 106 +++++++++++++++++++++------------------ 4 files changed, 61 insertions(+), 53 deletions(-) diff --git a/app/Commands/Collect.php b/app/Commands/Collect.php index 48f78ec..e33061d 100644 --- a/app/Commands/Collect.php +++ b/app/Commands/Collect.php @@ -36,9 +36,9 @@ class Collect extends Command //'Axma', //'IFDB', //'Itch', - 'Instead', - /* + //'Instead', 'Steam', + /* 'Urq', 'Kvester', 'Instory', diff --git a/app/Downloader.php b/app/Downloader.php index f6a4509..9481ecb 100644 --- a/app/Downloader.php +++ b/app/Downloader.php @@ -50,7 +50,7 @@ class Downloader { $resp = (string) $response->getBody(); Cache::put($url, $resp); return $resp; - } + } public function download($url, $outFile) { $options = array( diff --git a/app/Source.php b/app/Source.php index 8459d7d..653e63f 100644 --- a/app/Source.php +++ b/app/Source.php @@ -66,7 +66,7 @@ abstract class Source { } public function set_cookies($cookies) { - return $this->downloader->set_cookies($cookies); + return $this->downloader->setCookies($cookies); } /** diff --git a/app/Sources/Steam.php b/app/Sources/Steam.php index 10429ac..b0475a7 100644 --- a/app/Sources/Steam.php +++ b/app/Sources/Steam.php @@ -19,13 +19,19 @@ namespace App\Sources; use \App\Models\Game; -use \App\Source; use \Symfony\Component\DomCrawler\Crawler; use \GuzzleHttp\Cookie\CookieJar; use \GuzzleHttp\Cookie\SetCookie; +use \App\Models\Platform; +use \App\Models\Language; +use \App\Models\Author; +use \App\Models\Tag; +use \App\Source; +use Log; class Steam extends Source { public $title = "Steam"; + public $keyword = 'steam'; protected $games = []; public $delayedLoad = true; protected $months = [ @@ -56,30 +62,13 @@ class Steam extends Source { $this->dom->filter('#search_result_container a.search_result_row')->each(function($gameLink){ $url = $gameLink->attr('href'); $url = substr($url,0,strpos($url, '?')); // remove query string - $game = $this->page($url); - if ($game) { - if ($game->date) { - $date = $game->date->format('U'); - if ($date < $this->period) return; - } - $this->games[] = $game->print(); - } + $this->page($url); }); } public function parse() { - global $argv; - if (isset($argv[2])) { - $game = $this->page($argv[2]); - $this->output .= $game->print(); - } else { - $this->parse_tag("text-based"); - $this->parse_tag("interactive fiction"); - $this->parse_tag("visual novel"); - $this->games = array_unique($this->games); - foreach ($this->games as $game) { - $this->output .= $game; - } - } + $this->parse_tag("text-based"); + $this->parse_tag("interactive fiction"); + $this->parse_tag("visual novel"); } public function checkPage($url) { return (strpos($url,'store.steampowered.com/') !== FALSE); @@ -107,51 +96,70 @@ class Steam extends Source { $game = new Game; $game->url = $url; - $comingsoon = $this->dom->filter('div.game_area_comingsoon')->first(); - if ($comingsoon->count() > 0) { - // we are skipping preorders and coming soon games - return false; - } $title = $this->dom->filter('div.apphub_AppName')->first(); if ($title->count() > 0) { $game->title = trim($title->text()); } + + $game = $this->findGame($game); + $dsc = $this->dom->filter('div.game_description_snippet')->first(); if ($dsc->count() > 0) { $game->description = trim($dsc->text()); } - $author = $this->dom->filter('div#developers_list')->first(); - if ($author->count() > 0) { - $game->author = trim($author->text()); - if (strpos($game->author, ',') !== FALSE) { - $game->author = explode(',', $game->author); - $game->author = array_map('trim', $game->author); - } - } + $image = $this->dom->filter('img.game_header_image_full')->first(); if ($image->count() > 0) { - $game->image = $image->attr('src'); - } - $game->categories = 'Коммерческая ИЛ'; - $languages = $this->dom->filter('.game_language_options tr td:first-child'); - if ($languages->count() > 0) { - $game->language = []; - foreach ($languages as $language) { - $game->language[] = trim($language->nodeValue); - } - $game->language = implode(', ', $game->language); + $game->image_url = $image->attr('src'); } + $date = $this->dom->filter('div.date')->first(); if ($date->count() > 0) { $date = $date->text(); - $game->date = \DateTime::createFromFormat('d M, Y', $date); - if ($game->date === FALSE) { // если Steam отдал страницу на русском + $game->release_date = \DateTime::createFromFormat('d M, Y', $date); + if ($game->release_date === FALSE) { // если Steam отдал страницу на русском foreach ($this->months as $ruM => $enM) { $date = str_replace($ruM, $enM, $date); } - $game->date = \DateTime::createFromFormat('d F Y', $date); + $game->release_date = \DateTime::createFromFormat('d F Y', $date); + } + if (empty($game->release_date)) { + $game->release_date = NULL; + } + } + $game->save(); + + $languages = $this->dom->filter('.game_language_options tr td:first-child'); + if ($languages->count() > 0) { + $language_codes = []; + foreach ($languages as $language) { + $language_codes[] = trim($language->nodeValue); + } + foreach ($language_codes as $langCode) { + $language_model = Language::findByCode($langCode); + $game->languages()->attach($language_model); + } + } + + $author = $this->dom->filter('div#developers_list')->first(); + $authors = []; + if ($author->count() > 0) { + $authors = [trim($author->text())]; + if (strpos($authors[0], ',') !== FALSE) { + $authors = explode(',', $authors[0]); + $authors = array_map('trim', $authors); + } + } + foreach ($authors as $author) { + $author_model = Author::findByName($author); + if (empty($author_model)) { + $author_model = new Author(); + $author_model->name = $author; + $author_model->save(); + } + if (!$game->authors()->where('name', $author)->exists()) { + $game->authors()->attach($author_model); } } - return $game; } }