Archived
1
0
Fork 0

Apero + Downloader

This commit is contained in:
Alexander Yakovlev 2020-01-05 15:39:13 +07:00
parent affbb47da7
commit 2c331e9122
Signed by: oreolek
GPG key ID: 1CDC4B7820C93BD3
8 changed files with 181 additions and 50 deletions

View file

@ -29,10 +29,10 @@ class Collect extends Command
*/
protected $parsers = [
//'Anivisual',
'Hyperbook',
'HyperbookEn',
/*
//'Hyperbook',
//'HyperbookEn',
'Apero',
/*
'Questbook',
'Textadventures',
'IFDB',

View file

@ -18,19 +18,41 @@
*/
namespace App;
use Illuminate\Support\Facades\Cache;
use \GuzzleHttp\Client as GuzzleClient;
class Downloader {
public static function get_text($url) {
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_URL => $url,
));
$resp = curl_exec($curl);
curl_close($curl);
return $resp;
/**
* @var GuzzleClient
*/
protected $client;
public $cookies = '';
public function get_text($url, $post = []): string {
if (empty($this->client)) {
$this->client = new GuzzleClient([
'timeout' => 30,
]);
}
if (env('DEBUG') && Cache::has($url)) {
return Cache::get($url);
}
if ($post === []) {
$response = $this->client->request('GET', $url, [
'cookies' => $this->cookies,
]);
} else {
$response = $this->client->request('POST', $url, [
'form_params' => $post,
'cookies' => $this->cookies,
]);
}
$resp = (string) $response->getBody();
Cache::put($url, $resp);
return $resp;
}
public static function download($url, $outFile) {
public function download($url, $outFile) {
$options = array(
CURLOPT_FILE => fopen($outFile, 'w'),
CURLOPT_TIMEOUT => 28800, // set this to 8 hours so we dont timeout on big files
@ -41,5 +63,22 @@ class Downloader {
curl_setopt_array($ch, $options);
curl_exec($ch);
curl_close($ch);
}
}
public function setCookies($cookies): void {
$this->cookies = $cookies;
}
public function get_json($url) {
if (empty($this->client)) {
$this->client = new GuzzleClient([
'timeout' => 30,
]);
}
$response = $this->client->request('GET', $url, [
'cookies' => $this->cookies,
]);
$text = (string) $response->getBody();
return json_decode($text);
}
}

View file

@ -19,9 +19,9 @@
namespace App;
use \Symfony\Component\DomCrawler\Crawler;
use \GuzzleHttp\Client as GuzzleClient;
use App\Models\Game;
use Log;
use App\Downloader;
abstract class Source {
// Title
@ -29,7 +29,7 @@ abstract class Source {
// Optional warning or note
public $warning = FALSE;
protected $dom;
protected $cookies = '';
protected $downloader;
/**
* Should be load the page before the parsing or during
*
@ -49,34 +49,24 @@ abstract class Source {
* @return string
*/
public function get_text($url, $post = []) {
$client = new GuzzleClient([
'timeout' => 30,
]);
if ($post === []) {
$response = $client->request('GET', $url, [
'cookies' => $this->cookies,
]);
} else {
$response = $client->request('POST', $url, [
'form_params' => $post,
'cookies' => $this->cookies,
]);
if (empty($this->downloader)) {
$this->downloader = new Downloader();
}
return (string) $response->getBody();
return $this->downloader->get_text($url, $post);
}
/**
* GET JSON data.
*/
public function get_json($url) {
$client = new GuzzleClient([
'timeout' => 30,
]);
$response = $client->request('GET', $url, [
'cookies' => $this->cookies,
]);
$text = (string) $response->getBody();
return json_decode($text);
if (empty($this->downloader)) {
$this->downloader = new Downloader();
}
return $this->downloader->get_json($url);
}
public function set_cookies($cookies) {
return $this->downloader->set_cookies($cookies);
}
/**

View file

@ -33,6 +33,7 @@ use Log;
class Apero extends Source {
public $title = "Apero";
public $keyword = 'apero';
protected $urls = [];
public function parse() {
$text = $this->get_text('http://apero.ru/Текстовые-игры/Песочница', [
@ -47,13 +48,19 @@ class Apero extends Source {
$text = mb_convert_encoding($text, 'UTF-8', 'auto');
$this->loadStr($text);
$this->parseIndex();
foreach ($this->urls as $url) {
$text = $this->get_text($url);
$text = mb_convert_encoding($text, 'UTF-8', 'auto');
$this->loadStr($text);
$this->page($url);
}
}
public function parseIndex()
{
$this->dom->filter('.tabled-game-block')->each(function($gameBlock){
$url = trim($gameBlock->filter('h2 a')->first()->attr('href'));
$this->page($url);
$this->urls[] = $url;
});
}
@ -108,7 +115,7 @@ class Apero extends Source {
$date = NULL;
}
if (!empty($date)) {
$game->date = \DateTime::createFromFormat('Y-M-d', $date);
$game->release_date = \DateTime::createFromFormat('Y-m-d', $date);
}
// TODO description
$game->save();

View file

@ -85,22 +85,23 @@ class Steam extends Source {
return (strpos($url,'store.steampowered.com/') !== FALSE);
}
public function page($url) {
$this->cookies = new CookieJar(true);
$this->cookies->setCookie(new SetCookie([
$cookies = new CookieJar(true);
$cookies->setCookie(new SetCookie([
'Domain' => 'store.steampowered.com',
'Name' => 'mature_content',
'Value' => 1
]));
$this->cookies->setCookie(new SetCookie([
$cookies->setCookie(new SetCookie([
'Domain' => 'store.steampowered.com',
'Name' => 'Steam_Language',
'Value' => 'russian'
]));
$this->cookies->setCookie(new SetCookie([
$cookies->setCookie(new SetCookie([
'Domain' => 'store.steampowered.com',
'Name' => 'timezoneOffset',
'Value' => '0,0'
]));
$this->set_cookies($cookies);
$this->loadStr($this->get_text($url));
$game = new Game;

View file

@ -17,16 +17,17 @@
],
"require": {
"php": "^7.2",
"sabre/xml": "^2.0",
"addwiki/mediawiki-api": "^0.7.2",
"symfony/dom-crawler": "^4.0",
"symfony/css-selector": "^4.0",
"longman/telegram-bot": "dev-master",
"revolution/laravel-mastodon-api": "dev-master",
"cocur/slugify": "^3.2",
"illuminate/cache": "^6.8",
"illuminate/database": "^6.0",
"laravel-zero/framework": "^6.0",
"ryakad/pandoc-php": "^1.0"
"longman/telegram-bot": "dev-master",
"revolution/laravel-mastodon-api": "dev-master",
"ryakad/pandoc-php": "^1.0",
"sabre/xml": "^2.0",
"symfony/css-selector": "^4.0",
"symfony/dom-crawler": "^4.0"
},
"require-dev": {
"doctrine/dbal": "^2.10",

2
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "9f49824b478378eb0ff2a79995bb4034",
"content-hash": "68ef1b98d458c7638b97e4dd8ce161eb",
"packages": [
{
"name": "addwiki/mediawiki-api",

93
config/cache.php Normal file
View file

@ -0,0 +1,93 @@
<?php
use Illuminate\Support\Str;
return [
/*
|--------------------------------------------------------------------------
| Default Cache Store
|--------------------------------------------------------------------------
|
| This option controls the default cache connection that gets used while
| using this caching library. This connection is used when another is
| not explicitly specified when executing a given caching function.
|
| Supported: "apc", "array", "database", "file", "memcached", "redis"
|
*/
'default' => env('CACHE_DRIVER', 'memcached'),
/*
|--------------------------------------------------------------------------
| Cache Stores
|--------------------------------------------------------------------------
|
| Here you may define all of the cache "stores" for your application as
| well as their drivers. You may even define multiple stores for the
| same cache driver to group types of items stored in your caches.
|
*/
'stores' => [
'apc' => [
'driver' => 'apc',
],
'array' => [
'driver' => 'array',
],
'database' => [
'driver' => 'database',
'table' => 'cache',
'connection' => null,
],
'file' => [
'driver' => 'file',
'path' => storage_path('framework/cache/data'),
],
'memcached' => [
'driver' => 'memcached',
'persistent_id' => env('MEMCACHED_PERSISTENT_ID'),
'sasl' => [
env('MEMCACHED_USERNAME'),
env('MEMCACHED_PASSWORD'),
],
'options' => [
// Memcached::OPT_CONNECT_TIMEOUT => 2000,
],
'servers' => [
[
'host' => env('MEMCACHED_HOST', '127.0.0.1'),
'port' => env('MEMCACHED_PORT', 11211),
'weight' => 100,
],
],
],
'redis' => [
'driver' => 'redis',
'connection' => 'cache',
],
],
/*
|--------------------------------------------------------------------------
| Cache Key Prefix
|--------------------------------------------------------------------------
|
| When utilizing a RAM based store such as APC or Memcached, there might
| be other applications utilizing the same cache. So, we'll specify a
| value to get prefixed to all our keys so we can avoid collisions.
|
*/
'prefix' => env('CACHE_PREFIX', Str::slug(env('APP_NAME', 'ifnews'), '_').'_cache'),
];