Archived
1
0
Fork 0

Парсер QSP

This commit is contained in:
Alexander Yakovlev 2018-03-29 14:42:44 +07:00
parent 439f66380c
commit d931fd568b
10 changed files with 52 additions and 36 deletions

View file

@ -3,6 +3,7 @@ namespace Source;
use \Symfony\Component\DomCrawler\Crawler; use \Symfony\Component\DomCrawler\Crawler;
use \Game; use \Game;
use \GuzzleHttp\Client as GuzzleClient;
abstract class Source { abstract class Source {
public $title; public $title;
@ -52,17 +53,14 @@ abstract class Source {
* *
* @return string * @return string
*/ */
protected function get_text($url) { public function get_text($url) {
$curl = curl_init(); $client = new GuzzleClient([
curl_setopt_array($curl, array( 'timeout' => 30,
CURLOPT_RETURNTRANSFER => 1, ]);
CURLOPT_URL => $url, $response = $client->request('GET', $url, [
CURLOPT_CONNECTTIMEOUT => 30, 'cookies' => $this->cookies,
CURLOPT_COOKIE => $this->cookies, ]);
)); return (string) $response->getBody();
$resp = curl_exec($curl);
curl_close($curl);
return $resp;
} }
/** /**

View file

@ -20,9 +20,6 @@ class Anivisual extends Source {
'Декабря' => 'December', 'Декабря' => 'December',
]; ];
protected function parse() { protected function parse() {
$text = $this->get_text('http://anivisual.net/stuff/1');
$this->loadStr($text);
unset($text);
$this->dom->filter('.entryBlock')->each(function($gameBlock) { $this->dom->filter('.entryBlock')->each(function($gameBlock) {
$date = trim($gameBlock->filter('.icon-calendar')->text()); $date = trim($gameBlock->filter('.icon-calendar')->text());
foreach ($this->months as $ruM => $enM) { foreach ($this->months as $ruM => $enM) {

View file

@ -38,8 +38,6 @@ class Apero extends Source {
return (strpos($url,'http://apero.ru/') !== FALSE); return (strpos($url,'http://apero.ru/') !== FALSE);
} }
public function page($url) { public function page($url) {
$text = $this->get_text($url);
$this->loadStr($text);
$game = new Game; $game = new Game;
$game->url = $url; $game->url = $url;
$game->platform = 'Аперо'; $game->platform = 'Аперо';

View file

@ -52,8 +52,6 @@ class Hyperbook extends Source {
return (strpos($url,$this->rootUrl.'/comments.php') !== FALSE); return (strpos($url,$this->rootUrl.'/comments.php') !== FALSE);
} }
public function page($url) { public function page($url) {
$text = $this->get_text($url);
$this->loadStr($text);
$game = new Game; $game = new Game;
$game->url = $url; $game->url = $url;
$game->platform = 'AXMA Story Maker'; $game->platform = 'AXMA Story Maker';

View file

@ -34,7 +34,6 @@ class Instead extends Source {
return (strpos($url,'http://instead-games.ru/game.php') !== FALSE); return (strpos($url,'http://instead-games.ru/game.php') !== FALSE);
} }
public function page($url) { public function page($url) {
echo 'Распознана игра INSTEAD.'.PHP_EOL;
$text = $this->get_text($url); $text = $this->get_text($url);
$this->loadStr($text); $this->loadStr($text);
unset($text); unset($text);

View file

@ -23,4 +23,28 @@ class Qsp extends Source {
$this->output .= $game->print(); $this->output .= $game->print();
}); });
} }
public function checkPage($url) {
return (strpos($url,'http://qsp.su') !== FALSE);
}
public function page($url) {
$game = new Game;
$game->url = $url;
try {
$game->author = trim($this->dom->filter('#sobi2Details_field_author')->text());
$game->author = trim(str_replace($this->dom->filter('#sobi2Listing_field_author_label')->text(), '', $game->author));
$game->title = trim($this->dom->filter('.sobi2Details h1')->first()->text());
$game->description = trim($this->dom->filter('#sobi2Details_field_description')->text());
$game->platform = 'QSP';
$game->url_download = trim($this->dom->filter('h2 a')->attr('href'));
$game->url_download_description = 'Архив для интерпретатора QSP';
$game->image = trim($this->dom->filter('.sobi2DetailsImage')->first()->attr('src'));
preg_match('/\d?\d\.\d?\d\.\d{4}/', $this->dom->filter('.sobi2DetailsFooter tr:first-child td')->text(), $matches);
$game->date = new \DateTime($matches[0]);
} catch (\Exception $e) {
echo 'Ошибка парсинга. Проверьте URL.'.PHP_EOL;
echo $e->getTraceAsString();
die();
}
return $game;
}
} }

View file

@ -48,28 +48,25 @@ class Steam extends Source {
} }
public function page($url) { public function page($url) {
$this->cookies = 'mature_content=1; Steam_Language=russian'; $this->cookies = 'mature_content=1; Steam_Language=russian';
$text = $this->get_text($url);
$game_page = new Crawler($text);
unset($text);
$game = new Game; $game = new Game;
$game->url = $url; $game->url = $url;
$game->title = trim($game_page->filter('div.apphub_AppName')->first()->text()); $game->title = trim($this->dom->filter('div.apphub_AppName')->first()->text());
$game->description = trim($game_page->filter('div.game_description_snippet')->first()->text()); $game->description = trim($this->dom->filter('div.game_description_snippet')->first()->text());
$game->author = trim($game_page->filter('div#developers_list')->first()->text()); $game->author = trim($this->dom->filter('div#developers_list')->first()->text());
if (strpos($game->author, ',') !== FALSE) { if (strpos($game->author, ',') !== FALSE) {
$game->author = explode(',', $game->author); $game->author = explode(',', $game->author);
$game->author = array_map('trim', $game->author); $game->author = array_map('trim', $game->author);
} }
$game->image = $game_page->filter('img.game_header_image_full')->first()->attr('src'); $game->image = $this->dom->filter('img.game_header_image_full')->first()->attr('src');
$game->categories = 'Коммерческая ИЛ'; $game->categories = 'Коммерческая ИЛ';
$languages = $game_page->filter('.game_language_options tr td:first-child'); $languages = $this->dom->filter('.game_language_options tr td:first-child');
$game->language = []; $game->language = [];
foreach ($languages as $language) { foreach ($languages as $language) {
$game->language[] = trim($language->nodeValue); $game->language[] = trim($language->nodeValue);
} }
$game->language = implode(', ', $game->language); $game->language = implode(', ', $game->language);
$date = $game_page->filter('div.date')->first()->text(); $date = $this->dom->filter('div.date')->first()->text();
$game->date = \DateTime::createFromFormat('d M, Y', $date); $game->date = \DateTime::createFromFormat('d M, Y', $date);
if ($game->date === FALSE) { // если Steam отдал страницу на русском if ($game->date === FALSE) { // если Steam отдал страницу на русском
foreach ($this->months as $ruM => $enM) { foreach ($this->months as $ruM => $enM) {

View file

@ -22,9 +22,6 @@ class Urq extends Source {
return (strpos($url,'http://urq.plut.info/node/') !== FALSE); return (strpos($url,'http://urq.plut.info/node/') !== FALSE);
} }
public function page($url) { public function page($url) {
$text = $this->get_text($url);
$this->loadStr($text, []);
unset($text);
$game = new Game; $game = new Game;
$game->url = $url; $game->url = $url;
$gameBlock = $this->dom->filter('.content'); $gameBlock = $this->dom->filter('.content');

View file

@ -110,21 +110,27 @@ class Wikipage {
$this->content .= "\n== Версии =="; $this->content .= "\n== Версии ==";
} }
$this->txtadd('url_online', "\n* [#{@game.url_online} #{@game.url_online_description}]"); $this->txtadd('url_online', "\n* [#{@game.url_online} #{@game.url_online_description}]");
if (!empty($this->game->url_download) && !empty($this->game->url_download_description)) { if (!empty($this->game->url_download)) {
$this->content .= "\n* [$this->game->url_download $this->game->url_download_description]"; if (!empty($this->game->url_download_description)) {
$this->content .= PHP_EOL.'* ['.$this->game->url_download.' '.$this->game->url_download_description.']';
} else {
$this->content .= "\n* [#{$this->game->url_download} Скачать игру]";
}
} }
if (!empty($this->game->url_discussion) || !empty($this->game->url)) { if (!empty($this->game->url_discussion) || !empty($this->game->url)) {
$this->content .= "\n== Ссылки =="; $this->content .= "\n== Ссылки ==";
} }
$this->txtadd('url_discussion', '* ['.$this->game->url_discussion.' Обсуждение игры]'); $this->txtadd('url_discussion', '* ['.$this->game->url_discussion.' Обсуждение игры]');
$this->txtadd('url', '* ['.$this->game->url.' Страница игры]'); $this->txtadd('url', '* ['.$this->game->url.' Страница игры]');
if (is_array($this->game->categories)) { if (!empty($this->game->categories) && is_array($this->game->categories)) {
$this->content .= PHP_EOL; $this->content .= PHP_EOL;
foreach ($this->game->categories as $category) { foreach ($this->game->categories as $category) {
$this->content .= '[[Категория:'.$category.']]'; $this->content .= '[[Категория:'.$category.']]';
} }
} else { } else {
$this->content .= PHP_EOL.'[[Категория:'.$this->game->categories.']]'; if (!empty($this->game->categories)) {
$this->content .= PHP_EOL.'[[Категория:'.$this->game->categories.']]';
}
} }
} }

View file

@ -13,7 +13,7 @@ if (!isset($argv[1])) {
echo 'Please provide the direct URL to the game page.'.PHP_EOL; echo 'Please provide the direct URL to the game page.'.PHP_EOL;
die(); die();
} }
$url = strtolower($argv[1]); $url = $argv[1];
$game = new Game; $game = new Game;
function check($classname) { function check($classname) {
@ -23,6 +23,8 @@ function check($classname) {
$cname = 'Source\\'.$classname; $cname = 'Source\\'.$classname;
$cl = (new $cname()); $cl = (new $cname());
if ($cl->checkPage($url)) { if ($cl->checkPage($url)) {
echo 'Используется правило "'.$cl->title.'".'.PHP_EOL;
$cl->loadStr($cl->get_text($url));
$game = $cl->page($url); $game = $cl->page($url);
if ($game) { if ($game) {
$page = new Wikipage($game); $page = new Wikipage($game);