Apero + Downloader
This commit is contained in:
parent
affbb47da7
commit
2c331e9122
|
@ -29,10 +29,10 @@ class Collect extends Command
|
|||
*/
|
||||
protected $parsers = [
|
||||
//'Anivisual',
|
||||
'Hyperbook',
|
||||
'HyperbookEn',
|
||||
/*
|
||||
//'Hyperbook',
|
||||
//'HyperbookEn',
|
||||
'Apero',
|
||||
/*
|
||||
'Questbook',
|
||||
'Textadventures',
|
||||
'IFDB',
|
||||
|
|
|
@ -18,19 +18,41 @@
|
|||
*/
|
||||
namespace App;
|
||||
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use \GuzzleHttp\Client as GuzzleClient;
|
||||
|
||||
class Downloader {
|
||||
public static function get_text($url) {
|
||||
$curl = curl_init();
|
||||
curl_setopt_array($curl, array(
|
||||
CURLOPT_RETURNTRANSFER => 1,
|
||||
CURLOPT_URL => $url,
|
||||
));
|
||||
$resp = curl_exec($curl);
|
||||
curl_close($curl);
|
||||
return $resp;
|
||||
/**
|
||||
* @var GuzzleClient
|
||||
*/
|
||||
protected $client;
|
||||
public $cookies = '';
|
||||
|
||||
public function get_text($url, $post = []): string {
|
||||
if (empty($this->client)) {
|
||||
$this->client = new GuzzleClient([
|
||||
'timeout' => 30,
|
||||
]);
|
||||
}
|
||||
if (env('DEBUG') && Cache::has($url)) {
|
||||
return Cache::get($url);
|
||||
}
|
||||
if ($post === []) {
|
||||
$response = $this->client->request('GET', $url, [
|
||||
'cookies' => $this->cookies,
|
||||
]);
|
||||
} else {
|
||||
$response = $this->client->request('POST', $url, [
|
||||
'form_params' => $post,
|
||||
'cookies' => $this->cookies,
|
||||
]);
|
||||
}
|
||||
$resp = (string) $response->getBody();
|
||||
Cache::put($url, $resp);
|
||||
return $resp;
|
||||
}
|
||||
|
||||
public static function download($url, $outFile) {
|
||||
public function download($url, $outFile) {
|
||||
$options = array(
|
||||
CURLOPT_FILE => fopen($outFile, 'w'),
|
||||
CURLOPT_TIMEOUT => 28800, // set this to 8 hours so we dont timeout on big files
|
||||
|
@ -41,5 +63,22 @@ class Downloader {
|
|||
curl_setopt_array($ch, $options);
|
||||
curl_exec($ch);
|
||||
curl_close($ch);
|
||||
}
|
||||
}
|
||||
|
||||
public function setCookies($cookies): void {
|
||||
$this->cookies = $cookies;
|
||||
}
|
||||
|
||||
public function get_json($url) {
|
||||
if (empty($this->client)) {
|
||||
$this->client = new GuzzleClient([
|
||||
'timeout' => 30,
|
||||
]);
|
||||
}
|
||||
$response = $this->client->request('GET', $url, [
|
||||
'cookies' => $this->cookies,
|
||||
]);
|
||||
$text = (string) $response->getBody();
|
||||
return json_decode($text);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,9 +19,9 @@
|
|||
namespace App;
|
||||
|
||||
use \Symfony\Component\DomCrawler\Crawler;
|
||||
use \GuzzleHttp\Client as GuzzleClient;
|
||||
use App\Models\Game;
|
||||
use Log;
|
||||
use App\Downloader;
|
||||
|
||||
abstract class Source {
|
||||
// Title
|
||||
|
@ -29,7 +29,7 @@ abstract class Source {
|
|||
// Optional warning or note
|
||||
public $warning = FALSE;
|
||||
protected $dom;
|
||||
protected $cookies = '';
|
||||
protected $downloader;
|
||||
/**
|
||||
* Should be load the page before the parsing or during
|
||||
*
|
||||
|
@ -49,34 +49,24 @@ abstract class Source {
|
|||
* @return string
|
||||
*/
|
||||
public function get_text($url, $post = []) {
|
||||
$client = new GuzzleClient([
|
||||
'timeout' => 30,
|
||||
]);
|
||||
if ($post === []) {
|
||||
$response = $client->request('GET', $url, [
|
||||
'cookies' => $this->cookies,
|
||||
]);
|
||||
} else {
|
||||
$response = $client->request('POST', $url, [
|
||||
'form_params' => $post,
|
||||
'cookies' => $this->cookies,
|
||||
]);
|
||||
if (empty($this->downloader)) {
|
||||
$this->downloader = new Downloader();
|
||||
}
|
||||
return (string) $response->getBody();
|
||||
return $this->downloader->get_text($url, $post);
|
||||
}
|
||||
|
||||
/**
|
||||
* GET JSON data.
|
||||
*/
|
||||
public function get_json($url) {
|
||||
$client = new GuzzleClient([
|
||||
'timeout' => 30,
|
||||
]);
|
||||
$response = $client->request('GET', $url, [
|
||||
'cookies' => $this->cookies,
|
||||
]);
|
||||
$text = (string) $response->getBody();
|
||||
return json_decode($text);
|
||||
if (empty($this->downloader)) {
|
||||
$this->downloader = new Downloader();
|
||||
}
|
||||
return $this->downloader->get_json($url);
|
||||
}
|
||||
|
||||
public function set_cookies($cookies) {
|
||||
return $this->downloader->set_cookies($cookies);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -33,6 +33,7 @@ use Log;
|
|||
class Apero extends Source {
|
||||
public $title = "Apero";
|
||||
public $keyword = 'apero';
|
||||
protected $urls = [];
|
||||
|
||||
public function parse() {
|
||||
$text = $this->get_text('http://apero.ru/Текстовые-игры/Песочница', [
|
||||
|
@ -47,13 +48,19 @@ class Apero extends Source {
|
|||
$text = mb_convert_encoding($text, 'UTF-8', 'auto');
|
||||
$this->loadStr($text);
|
||||
$this->parseIndex();
|
||||
foreach ($this->urls as $url) {
|
||||
$text = $this->get_text($url);
|
||||
$text = mb_convert_encoding($text, 'UTF-8', 'auto');
|
||||
$this->loadStr($text);
|
||||
$this->page($url);
|
||||
}
|
||||
}
|
||||
|
||||
public function parseIndex()
|
||||
{
|
||||
$this->dom->filter('.tabled-game-block')->each(function($gameBlock){
|
||||
$url = trim($gameBlock->filter('h2 a')->first()->attr('href'));
|
||||
$this->page($url);
|
||||
$this->urls[] = $url;
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -108,7 +115,7 @@ class Apero extends Source {
|
|||
$date = NULL;
|
||||
}
|
||||
if (!empty($date)) {
|
||||
$game->date = \DateTime::createFromFormat('Y-M-d', $date);
|
||||
$game->release_date = \DateTime::createFromFormat('Y-m-d', $date);
|
||||
}
|
||||
// TODO description
|
||||
$game->save();
|
||||
|
|
|
@ -85,22 +85,23 @@ class Steam extends Source {
|
|||
return (strpos($url,'store.steampowered.com/') !== FALSE);
|
||||
}
|
||||
public function page($url) {
|
||||
$this->cookies = new CookieJar(true);
|
||||
$this->cookies->setCookie(new SetCookie([
|
||||
$cookies = new CookieJar(true);
|
||||
$cookies->setCookie(new SetCookie([
|
||||
'Domain' => 'store.steampowered.com',
|
||||
'Name' => 'mature_content',
|
||||
'Value' => 1
|
||||
]));
|
||||
$this->cookies->setCookie(new SetCookie([
|
||||
$cookies->setCookie(new SetCookie([
|
||||
'Domain' => 'store.steampowered.com',
|
||||
'Name' => 'Steam_Language',
|
||||
'Value' => 'russian'
|
||||
]));
|
||||
$this->cookies->setCookie(new SetCookie([
|
||||
$cookies->setCookie(new SetCookie([
|
||||
'Domain' => 'store.steampowered.com',
|
||||
'Name' => 'timezoneOffset',
|
||||
'Value' => '0,0'
|
||||
]));
|
||||
$this->set_cookies($cookies);
|
||||
$this->loadStr($this->get_text($url));
|
||||
|
||||
$game = new Game;
|
||||
|
|
|
@ -17,16 +17,17 @@
|
|||
],
|
||||
"require": {
|
||||
"php": "^7.2",
|
||||
"sabre/xml": "^2.0",
|
||||
"addwiki/mediawiki-api": "^0.7.2",
|
||||
"symfony/dom-crawler": "^4.0",
|
||||
"symfony/css-selector": "^4.0",
|
||||
"longman/telegram-bot": "dev-master",
|
||||
"revolution/laravel-mastodon-api": "dev-master",
|
||||
"cocur/slugify": "^3.2",
|
||||
"illuminate/cache": "^6.8",
|
||||
"illuminate/database": "^6.0",
|
||||
"laravel-zero/framework": "^6.0",
|
||||
"ryakad/pandoc-php": "^1.0"
|
||||
"longman/telegram-bot": "dev-master",
|
||||
"revolution/laravel-mastodon-api": "dev-master",
|
||||
"ryakad/pandoc-php": "^1.0",
|
||||
"sabre/xml": "^2.0",
|
||||
"symfony/css-selector": "^4.0",
|
||||
"symfony/dom-crawler": "^4.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"doctrine/dbal": "^2.10",
|
||||
|
|
2
composer.lock
generated
2
composer.lock
generated
|
@ -4,7 +4,7 @@
|
|||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "9f49824b478378eb0ff2a79995bb4034",
|
||||
"content-hash": "68ef1b98d458c7638b97e4dd8ce161eb",
|
||||
"packages": [
|
||||
{
|
||||
"name": "addwiki/mediawiki-api",
|
||||
|
|
93
config/cache.php
Normal file
93
config/cache.php
Normal file
|
@ -0,0 +1,93 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Support\Str;
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Default Cache Store
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option controls the default cache connection that gets used while
|
||||
| using this caching library. This connection is used when another is
|
||||
| not explicitly specified when executing a given caching function.
|
||||
|
|
||||
| Supported: "apc", "array", "database", "file", "memcached", "redis"
|
||||
|
|
||||
*/
|
||||
|
||||
'default' => env('CACHE_DRIVER', 'memcached'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Cache Stores
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may define all of the cache "stores" for your application as
|
||||
| well as their drivers. You may even define multiple stores for the
|
||||
| same cache driver to group types of items stored in your caches.
|
||||
|
|
||||
*/
|
||||
|
||||
'stores' => [
|
||||
|
||||
'apc' => [
|
||||
'driver' => 'apc',
|
||||
],
|
||||
|
||||
'array' => [
|
||||
'driver' => 'array',
|
||||
],
|
||||
|
||||
'database' => [
|
||||
'driver' => 'database',
|
||||
'table' => 'cache',
|
||||
'connection' => null,
|
||||
],
|
||||
|
||||
'file' => [
|
||||
'driver' => 'file',
|
||||
'path' => storage_path('framework/cache/data'),
|
||||
],
|
||||
|
||||
'memcached' => [
|
||||
'driver' => 'memcached',
|
||||
'persistent_id' => env('MEMCACHED_PERSISTENT_ID'),
|
||||
'sasl' => [
|
||||
env('MEMCACHED_USERNAME'),
|
||||
env('MEMCACHED_PASSWORD'),
|
||||
],
|
||||
'options' => [
|
||||
// Memcached::OPT_CONNECT_TIMEOUT => 2000,
|
||||
],
|
||||
'servers' => [
|
||||
[
|
||||
'host' => env('MEMCACHED_HOST', '127.0.0.1'),
|
||||
'port' => env('MEMCACHED_PORT', 11211),
|
||||
'weight' => 100,
|
||||
],
|
||||
],
|
||||
],
|
||||
|
||||
'redis' => [
|
||||
'driver' => 'redis',
|
||||
'connection' => 'cache',
|
||||
],
|
||||
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Cache Key Prefix
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| When utilizing a RAM based store such as APC or Memcached, there might
|
||||
| be other applications utilizing the same cache. So, we'll specify a
|
||||
| value to get prefixed to all our keys so we can avoid collisions.
|
||||
|
|
||||
*/
|
||||
|
||||
'prefix' => env('CACHE_PREFIX', Str::slug(env('APP_NAME', 'ifnews'), '_').'_cache'),
|
||||
|
||||
];
|
Reference in a new issue