Archived
1
0
Fork 0

Парсер QSP

This commit is contained in:
Alexander Yakovlev 2018-03-29 14:42:44 +07:00
parent 439f66380c
commit d931fd568b
10 changed files with 52 additions and 36 deletions

View file

@ -3,6 +3,7 @@ namespace Source;
use \Symfony\Component\DomCrawler\Crawler;
use \Game;
use \GuzzleHttp\Client as GuzzleClient;
abstract class Source {
public $title;
@ -52,17 +53,14 @@ abstract class Source {
*
* @return string
*/
protected function get_text($url) {
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_URL => $url,
CURLOPT_CONNECTTIMEOUT => 30,
CURLOPT_COOKIE => $this->cookies,
));
$resp = curl_exec($curl);
curl_close($curl);
return $resp;
public function get_text($url) {
$client = new GuzzleClient([
'timeout' => 30,
]);
$response = $client->request('GET', $url, [
'cookies' => $this->cookies,
]);
return (string) $response->getBody();
}
/**

View file

@ -20,9 +20,6 @@ class Anivisual extends Source {
'Декабря' => 'December',
];
protected function parse() {
$text = $this->get_text('http://anivisual.net/stuff/1');
$this->loadStr($text);
unset($text);
$this->dom->filter('.entryBlock')->each(function($gameBlock) {
$date = trim($gameBlock->filter('.icon-calendar')->text());
foreach ($this->months as $ruM => $enM) {

View file

@ -38,8 +38,6 @@ class Apero extends Source {
return (strpos($url,'http://apero.ru/') !== FALSE);
}
public function page($url) {
$text = $this->get_text($url);
$this->loadStr($text);
$game = new Game;
$game->url = $url;
$game->platform = 'Аперо';

View file

@ -52,8 +52,6 @@ class Hyperbook extends Source {
return (strpos($url,$this->rootUrl.'/comments.php') !== FALSE);
}
public function page($url) {
$text = $this->get_text($url);
$this->loadStr($text);
$game = new Game;
$game->url = $url;
$game->platform = 'AXMA Story Maker';

View file

@ -34,7 +34,6 @@ class Instead extends Source {
return (strpos($url,'http://instead-games.ru/game.php') !== FALSE);
}
public function page($url) {
echo 'Распознана игра INSTEAD.'.PHP_EOL;
$text = $this->get_text($url);
$this->loadStr($text);
unset($text);

View file

@ -23,4 +23,28 @@ class Qsp extends Source {
$this->output .= $game->print();
});
}
public function checkPage($url) {
return (strpos($url,'http://qsp.su') !== FALSE);
}
public function page($url) {
$game = new Game;
$game->url = $url;
try {
$game->author = trim($this->dom->filter('#sobi2Details_field_author')->text());
$game->author = trim(str_replace($this->dom->filter('#sobi2Listing_field_author_label')->text(), '', $game->author));
$game->title = trim($this->dom->filter('.sobi2Details h1')->first()->text());
$game->description = trim($this->dom->filter('#sobi2Details_field_description')->text());
$game->platform = 'QSP';
$game->url_download = trim($this->dom->filter('h2 a')->attr('href'));
$game->url_download_description = 'Архив для интерпретатора QSP';
$game->image = trim($this->dom->filter('.sobi2DetailsImage')->first()->attr('src'));
preg_match('/\d?\d\.\d?\d\.\d{4}/', $this->dom->filter('.sobi2DetailsFooter tr:first-child td')->text(), $matches);
$game->date = new \DateTime($matches[0]);
} catch (\Exception $e) {
echo 'Ошибка парсинга. Проверьте URL.'.PHP_EOL;
echo $e->getTraceAsString();
die();
}
return $game;
}
}

View file

@ -48,28 +48,25 @@ class Steam extends Source {
}
public function page($url) {
$this->cookies = 'mature_content=1; Steam_Language=russian';
$text = $this->get_text($url);
$game_page = new Crawler($text);
unset($text);
$game = new Game;
$game->url = $url;
$game->title = trim($game_page->filter('div.apphub_AppName')->first()->text());
$game->description = trim($game_page->filter('div.game_description_snippet')->first()->text());
$game->author = trim($game_page->filter('div#developers_list')->first()->text());
$game->title = trim($this->dom->filter('div.apphub_AppName')->first()->text());
$game->description = trim($this->dom->filter('div.game_description_snippet')->first()->text());
$game->author = trim($this->dom->filter('div#developers_list')->first()->text());
if (strpos($game->author, ',') !== FALSE) {
$game->author = explode(',', $game->author);
$game->author = array_map('trim', $game->author);
}
$game->image = $game_page->filter('img.game_header_image_full')->first()->attr('src');
$game->image = $this->dom->filter('img.game_header_image_full')->first()->attr('src');
$game->categories = 'Коммерческая ИЛ';
$languages = $game_page->filter('.game_language_options tr td:first-child');
$languages = $this->dom->filter('.game_language_options tr td:first-child');
$game->language = [];
foreach ($languages as $language) {
$game->language[] = trim($language->nodeValue);
}
$game->language = implode(', ', $game->language);
$date = $game_page->filter('div.date')->first()->text();
$date = $this->dom->filter('div.date')->first()->text();
$game->date = \DateTime::createFromFormat('d M, Y', $date);
if ($game->date === FALSE) { // если Steam отдал страницу на русском
foreach ($this->months as $ruM => $enM) {

View file

@ -22,9 +22,6 @@ class Urq extends Source {
return (strpos($url,'http://urq.plut.info/node/') !== FALSE);
}
public function page($url) {
$text = $this->get_text($url);
$this->loadStr($text, []);
unset($text);
$game = new Game;
$game->url = $url;
$gameBlock = $this->dom->filter('.content');

View file

@ -110,21 +110,27 @@ class Wikipage {
$this->content .= "\n== Версии ==";
}
$this->txtadd('url_online', "\n* [#{@game.url_online} #{@game.url_online_description}]");
if (!empty($this->game->url_download) && !empty($this->game->url_download_description)) {
$this->content .= "\n* [$this->game->url_download $this->game->url_download_description]";
if (!empty($this->game->url_download)) {
if (!empty($this->game->url_download_description)) {
$this->content .= PHP_EOL.'* ['.$this->game->url_download.' '.$this->game->url_download_description.']';
} else {
$this->content .= "\n* [#{$this->game->url_download} Скачать игру]";
}
}
if (!empty($this->game->url_discussion) || !empty($this->game->url)) {
$this->content .= "\n== Ссылки ==";
}
$this->txtadd('url_discussion', '* ['.$this->game->url_discussion.' Обсуждение игры]');
$this->txtadd('url', '* ['.$this->game->url.' Страница игры]');
if (is_array($this->game->categories)) {
if (!empty($this->game->categories) && is_array($this->game->categories)) {
$this->content .= PHP_EOL;
foreach ($this->game->categories as $category) {
$this->content .= '[[Категория:'.$category.']]';
}
} else {
$this->content .= PHP_EOL.'[[Категория:'.$this->game->categories.']]';
if (!empty($this->game->categories)) {
$this->content .= PHP_EOL.'[[Категория:'.$this->game->categories.']]';
}
}
}

View file

@ -13,7 +13,7 @@ if (!isset($argv[1])) {
echo 'Please provide the direct URL to the game page.'.PHP_EOL;
die();
}
$url = strtolower($argv[1]);
$url = $argv[1];
$game = new Game;
function check($classname) {
@ -23,6 +23,8 @@ function check($classname) {
$cname = 'Source\\'.$classname;
$cl = (new $cname());
if ($cl->checkPage($url)) {
echo 'Используется правило "'.$cl->title.'".'.PHP_EOL;
$cl->loadStr($cl->get_text($url));
$game = $cl->page($url);
if ($game) {
$page = new Wikipage($game);