diff --git a/app/Commands/Collect.php b/app/Commands/Collect.php index f62a200..aa8766c 100644 --- a/app/Commands/Collect.php +++ b/app/Commands/Collect.php @@ -13,7 +13,7 @@ class Collect extends Command * * @var string */ - protected $signature = 'collect'; + protected $signature = 'collect {keyword?}'; /** * The description of the command. @@ -28,10 +28,10 @@ class Collect extends Command * @var array */ protected $parsers = [ - 'Anivisual', + //'Anivisual', + 'Hyperbook', /* 'Apero', - 'Hyperbook', 'Questbook', 'Textadventures', 'IFDB', @@ -55,7 +55,7 @@ class Collect extends Command */ public function handle() { - Log::debug('Collecting.'); + Log::debug('Collecting.'); foreach ($this->parsers as $parser) { $cname = 'App\\Sources\\'.$parser; Log::debug($parser); diff --git a/app/Sources/Anivisual.php b/app/Sources/Anivisual.php index 728a370..bbe769f 100644 --- a/app/Sources/Anivisual.php +++ b/app/Sources/Anivisual.php @@ -48,18 +48,7 @@ class Anivisual extends Source { $text = $this->get_text('http://anivisual.net/stuff/1'); $this->loadStr($text); unset($text); - $lastDate = $this->getLastDate(); - $this->dom->filter('.entryBlock')->each(function($gameBlock) use($lastDate) { - // Check that the game date is after the date of the last parsed game - $date = trim($gameBlock->filter('.icon-calendar')->text()); - foreach ($this->months as $ruM => $enM) { - $date = str_replace($ruM, $enM, $date); - } - $date = \DateTime::createFromFormat('d F Y', $date); - if (!empty($lastDate) && $date >= $lastDate) { - return; - } - + $this->dom->filter('.entryBlock')->each(function($gameBlock) { // Get the game link $link = $gameBlock->filter('.novel-ttl a')->first(); $link = 'http://anivisual.net'.$link->attr('href'); diff --git a/app/Sources/Apero.php b/app/Sources/Apero.php index 60466d1..84dc0c3 100644 --- a/app/Sources/Apero.php +++ b/app/Sources/Apero.php @@ -19,14 +19,20 @@ namespace App\Sources; use \App\Models\Game; +use \App\Models\Platform; +use \App\Models\Language; +use \App\Models\Author; +use \App\Models\Tag; use \App\Source; +use Log; /** * Парсер для Apero.ru * Проблема парсера в том, что на Аперо часто поломана кодировка UTF-8. */ class Apero extends Source { - public $title = "Apero"; + public $title = "Apero"; + public $keyword = 'apero'; public function parse() { $text = $this->get_text('http://apero.ru/Текстовые-игры/Песочница', [ @@ -46,41 +52,50 @@ class Apero extends Source { public function parseIndex() { $this->dom->filter('.tabled-game-block')->each(function($gameBlock){ - $formatter = new \IntlDateFormatter( 'ru', \IntlDateFormatter::LONG, \IntlDateFormatter::NONE ); - $date = trim($gameBlock->filter('.game-updated-block')->text(), "() \t\n\r\0\x0B"); - $date = str_replace('вчера', date('d.m.Y', strtotime('-1 day')), $date); - $date = $formatter->parse($date); - // TODO last date from last game from apero - $game = new Game; - $game->author = trim($gameBlock->filter('.game-author-block:first-child a')->text()); - $game->title = trim($gameBlock->filter('h2 a')->first()->text()); - $game->url = trim($gameBlock->filter('h2 a')->first()->attr('href')); - $game->description = trim($gameBlock->filter('.game-desc-block')->first()->text()); - $this->saveGame($game); + $url = trim($gameBlock->filter('h2 a')->first()->attr('href')); + $this->page($url); }); } + public function checkPage($url) { return (strpos($url,'http://apero.ru/') !== FALSE); } + public function page($url) { $game = new Game; $game->url = $url; - $game->platform = 'Аперо'; - $game->title = $this->dom->filter('dd')->reduce(function($block) { + if ($this->dom->filter('#printer')->count() > 0) { + $game->source_id = (int) $this->dom->filter('#printer')->first()->attr('data-game-id'); + } else { + $id = $this->dom->filter('dd')->reduce(function($block) { + if ($block->attr('style') === 'color: #cccccc;') { + return true; + } + return false; + }); + if ($id->count() > 0) { + $game->source_id = (int) $id->text(); + } + } + if (empty($game->source_id)) { + throw new \Exception('no id'); + } + + $game = $this->findGame($game); + if ($game->isClean()) { + return; + } + + $title = $this->dom->filter('dd')->reduce(function($block) { if ($block->attr('itemprop') === 'name') { return true; } return false; - })->text(); - $game->author = []; - $this->dom->filter('dd a')->reduce(function($block){ - if ($block->attr('itemprop') === 'author') { - return true; - } - return false; - })->each(function($block) use($game){ - $game->author[] = $block->text(); }); + if ($title->count() > 0) { + $game->title = $title->text(); + } + $date = $this->dom->filter('meta')->reduce(function($block){ if ($block->attr('itemprop') === 'datePublished') { return true; @@ -92,8 +107,65 @@ class Apero extends Source { } else { $date = NULL; } - $game->date = \DateTime::createFromFormat('Y-M-d', $date); + if (!empty($date)) { + $game->date = \DateTime::createFromFormat('Y-M-d', $date); + } // TODO description - return $game; + $game->save(); + + $this->dom->filter('dd a')->reduce(function($block){ + if ($block->attr('itemprop') === 'author') { + return true; + } + return false; + })->each(function($block) use($game){ + $author_name = $block->text(); + $author_url = $block->attr('href'); + + $author_model = Author::findByName($author_name); + if (empty($author_model)) { + $author_model = new Author(); + $author_model->name = $author_name; + $author_model->url = $author_url; + $author_model->save(); + } + if (!$game->authors()->where('name', $author_name)->exists()) { + $game->authors()->attach($author_model); + } + }); + + $language = Language::findByCode('ru'); + if (!$game->languages()->where('code', 'ru')->exists()) { + $game->languages()->attach($language); + } + + $model = Platform::where('title', 'Аперо')->first(); + if (!$model) { + $model = new Platform(); + $model->title = 'Аперо'; + $model->save(); + } + $game->platforms()->attach($model); + + $this->dom->filter('dd')->reduce(function($block){ + if ($block->attr('itemprop') === 'genre') { + return true; + } + return false; + })->each(function($block) use($game, $language){ + $genre = trim($block->text()); + $model = Tag::where('language_id', $language->id) + ->where('title', $genre) + ->first(); + if (!$model) { + $model = new Tag(); + $model->language_id = $language->id; + $model->title = $genre; + $model->save(); + } + $game->tags()->attach($model); + }); + + Log::info($game->title); } } diff --git a/app/Sources/Hyperbook.php b/app/Sources/Hyperbook.php index bf5b95e..29d3909 100644 --- a/app/Sources/Hyperbook.php +++ b/app/Sources/Hyperbook.php @@ -19,74 +19,107 @@ namespace App\Sources; use \App\Models\Game; +use \App\Models\Platform; +use \App\Models\Language; +use \App\Models\Author; +use \App\Models\Tag; use \App\Source; +use Log; class Hyperbook extends Source { public $title = "Гиперкнига"; - protected $games = array(); + protected $platform = 'AXMA Story Maker'; + protected $platform_model; + protected $language_model; protected $rootUrl = 'http://hyperbook.ru'; public function parse() { $text = $this->get_text($this->rootUrl.'/lib.php?sort=time'); $this->loadStr($text); unset($text); - $this->dom->filter("#listPubs h3 a")->each(function($link) { - $game = new Game; - $game->title = $link->text(); - $game->url = $link->attr('href'); - $game->url = str_replace('file', $this->rootUrl.'/comments.php?id=', $game->url); - $this->games[] = $game; - }); - $this->dom->filter("#listPubs div")->reduce(function($node) { - if ($node->attr('style') === 'text-align:left;margin-bottom:4px;') - return true; - return false; - })->each(function($author, $i) { - $this->games[$i]->author = $author->text(); - }); - $this->dom->filter("#listPubs div")->reduce(function($node) { - if ($node->attr('style') === 'float: left; width: 20%; text-align:right;') - return true; - return false; - })->each(function($date, $i){ - $this->games[$i]->date = $date->text(); - }); - $this->dom->filter("#listPubs div")->reduce(function($node) { - if ($node->attr('style') === NULL) - return true; - return false; - })->each(function($dsc, $i){ - $this->games[$i]->description = $dsc->text(); - }); - foreach ($this->games as $game) { - $this->saveGame($game); + $model = Platform::where('title', $this->platform)->first(); + if (!$model) { + $model = new Platform(); + $model->title = $this->platform; + $model->save(); } + $this->platform_model = $model; + + $this->language_model = Language::findByCode('ru'); + + $this->dom->filter("#listPubs h3 a")->each(function($link) { + $id = $link->attr('href'); + $id = (int) str_replace('file', '', $id); + $url = $this->rootUrl.'/comments.php?id='.$id; + $this->page($url, $id); + }); } public function checkPage($url) { return (strpos($url,$this->rootUrl.'/comments.php') !== FALSE); } - public function page($url) { + public function page($url, $id) { $game = new Game; $game->url = $url; - $game->platform = 'AXMA Story Maker'; + $game->source_id = $id; + + $game = $this->findGame($game); + if ($game->isClean()) { + return; + } + $game->title = $this->dom->filter(".content h1")->first()->text(); $game->title = trim(str_replace($this->dom->filter("h1 span")->first()->text(), '', $game->title)); - $game->author = $this->dom->filter(".content > div")->reduce(function($node) { + + $date = $this->dom->filter(".content div.small")->reduce(function($node) { + if ($node->attr('style') === 'float: left; width: 20%; text-align:right;') + return true; + return false; + }); + if ($date->count() > 0){ + $date = $date->first()->text(); + if (!empty($date)) { + $game->date = \DateTime::createFromFormat('d.m.y', $date); + } + } + + $description = $this->dom->filter(".content div.small")->reduce(function($node) { + if ($node->attr('style') === NULL) + return true; + return false; + }); + if ($description->count() > 0) { + $game->description = $description->first()->text(); + } + $game->save(); + + $author = $this->dom->filter(".content > div")->reduce(function($node) { if ($node->attr('style') === 'float: left; width: 50%; margin-bottom:14px; text-align: left;') { return true; } return false; - })->first()->text(); - $game->date = $this->dom->filter(".content div.small")->reduce(function($node) { - if ($node->attr('style') === 'float: left; width: 20%; text-align:right;') - return true; - return false; - })->first()->text(); - $game->description = $this->dom->filter(".content div.small")->reduce(function($node) { - if ($node->attr('style') === NULL) - return true; - return false; - })->first()->text(); - $game->date = \DateTime::createFromFormat('d.m.y', $game->date); - return $game; + }); + if ($author->count() > 0) { + $author = $author->first()->text(); + } else { + $author = ''; + } + + if (!empty($author)) { + $author_model = Author::findByName($author); + if (empty($author_model)) { + $author_model = new Author(); + $author_model->name = $author; + $author_model->save(); + } + if (!$game->authors()->where('name', $author)->exists()) { + $game->authors()->attach($author_model); + } + } + + $game->platforms()->attach($this->platform_model); + if (!$game->languages()->where('code', 'ru')->exists()) { + $game->languages()->attach($this->language_model); + } + + Log::info($game->title); } }