Archived
1
0
Fork 0

Парсинг Anivisual

This commit is contained in:
Alexander Yakovlev 2019-11-30 13:08:17 +07:00
parent 266d7c369d
commit 8031fb239f
Signed by: oreolek
GPG key ID: 1CDC4B7820C93BD3
11 changed files with 473 additions and 49 deletions

View file

@ -29,6 +29,7 @@ class Collect extends Command
*/
protected $parsers = [
'Anivisual',
/*
'Apero',
'Hyperbook',
'Questbook',
@ -36,7 +37,6 @@ class Collect extends Command
'IFDB',
'Itch',
'Steam',
/*
'Urq',
'Kvester',
'vndb',
@ -61,10 +61,7 @@ class Collect extends Command
Log::debug($parser);
try {
$parser = new $cname();
$games = $parser->parse();
foreach ($games as $game) {
$parser->saveGame($game);
}
$parser->parse();
} catch (\Exception $e) {
Log::error($e->getMessage());
Log::debug($e->getTraceAsString());

View file

@ -7,9 +7,14 @@ use App\Models\Game;
class Author extends Model
{
protected $table = 'authors';
protected $table = 'authors';
public $timestamps = FALSE;
public function games() {
return $this->belongsToMany(Game::class, 'authors_games');
}
}
public static function findByName($name) {
return self::where('name', $name)->first();
}
}

View file

@ -10,7 +10,8 @@ use App\Models\Platform;
class Game extends Model
{
protected $table = 'games';
protected $table = 'games';
public $guarded = ['id'];
public function authors() {
return $this->belongsToMany(Author::class, 'authors_games');

View file

@ -8,7 +8,8 @@ use App\Models\Tag;
class Language extends Model
{
protected $table = 'languages';
protected $table = 'languages';
public $timestamps = false;
public function games() {
return $this->belongsToMany(Game::class, 'languages_games');
@ -16,5 +17,9 @@ class Language extends Model
public function tags() {
return $this->hasMany(Tag::class);
}
}
public static function findByCode($code) {
return self::where('code', $code)->first();
}
}

View file

@ -89,23 +89,21 @@ abstract class Source {
}
/**
* Save the game if not a duplicate.
* Find if we already have the game model.
*/
public function saveGame(Game $game) {
Log::debug('Saving game '.$game->title);
$game->source = get_class($this);
public function findGame(Game $game): Game {
$game->source = (new \ReflectionClass($this))->getShortName();
$dbmodel = NULL;
if (isset($game->source_id)) {
$dbmodel = Game::where('source', $game->source)
->where('source_id', $game->source_id)
->first();
}
if ($dbmodel) {
$dbmodel->fill($game);
$dbmodel->save();
} else {
$game->save();
}
if (isset($game->source_id)) {
$dbmodel = Game::where('source', $game->source)
->where('source_id', $game->source_id)
->first();
if ($dbmodel) {
$dbmodel->fill($game->toArray());
return $dbmodel;
}
}
return $game;
}
/**
@ -115,7 +113,7 @@ abstract class Source {
$date = Game::where('source', self::class)
->orderBy('created_at', 'desc')
->limit(1)
->value('created_at');
->value('release_date');
if (!$date) {
return NULL;
}

View file

@ -19,7 +19,11 @@
namespace App\Sources;
use \App\Models\Game;
use \App\Models\Language;
use \App\Models\Author;
use \App\Source;
use Symfony\Component\DomCrawler\Crawler;
use Log;
class Anivisual extends Source {
public $title = "Anivisual";
@ -43,8 +47,8 @@ class Anivisual extends Source {
$this->loadStr($text);
unset($text);
$lastDate = $this->getLastDate();
$games = [];
$this->dom->filter('.entryBlock')->each(function($gameBlock) use($lastDate, &$games) {
$this->dom->filter('.entryBlock')->each(function($gameBlock) use($lastDate) {
// Check that the game date is after the date of the last parsed game
$date = trim($gameBlock->filter('.icon-calendar')->text());
foreach ($this->months as $ruM => $enM) {
$date = str_replace($ruM, $enM, $date);
@ -52,15 +56,13 @@ class Anivisual extends Source {
$date = \DateTime::createFromFormat('d F Y', $date);
if (!empty($lastDate) && $date >= $lastDate) {
return;
}
$game->date = $date;
}
// Get the game link
$link = $gameBlock->filter('.novel-ttl a')->first();
$link = 'http://anivisual.net'.$link->attr('href');
$game = $this->page($link);
$games[] = $game;
$this->page($link);
});
return $games;
}
public function checkPage($url) {
@ -70,9 +72,13 @@ class Anivisual extends Source {
public function page($url) {
$text = $this->get_text($url);
$this->loadStr($text);
unset($text);
$game = new Game;
$game->url = $url;
unset($text);
$game = new Game();
$game->url = $url;
$game->source_id = str_replace('http://anivisual.net/stuff/', '', $url);
$game = $this->findGame($game);
$gameBlock = $this->dom->filter('#casing-box');
$dateBlock = $this->dom->filter('.icon-calendar');
$date = '';
@ -83,7 +89,7 @@ class Anivisual extends Source {
foreach ($this->months as $ruM => $enM) {
$date = str_replace($ruM, $enM, $date);
}
$game->date = \DateTime::createFromFormat('d F Y', $date);
$game->release_date = \DateTime::createFromFormat('d F Y', $date);
unset($date);
}
$title = $this->dom->filter('h1.logo')->first();
@ -93,24 +99,88 @@ class Anivisual extends Source {
$game->description = $this->dom->filter('#content > section > span')->first()->text();
$game->description = str_replace('(adsbygoogle = window.adsbygoogle || []).push({});', '', $game->description);
$game->description = str_replace('Доп. ссылки: Доступно только для пользователей', '', $game->description);
$game->description = trim($game->description);
$game->description = trim($game->description);
$game->save();
$language = Language::findByCode('ru');
if (!$game->languages()->where('code', 'ru')->exists()) {
$game->languages()->attach($language);
$game->save();
}
$sidebar = $gameBlock->filter('#sidebar')->first()->html();
$pos_start = mb_strpos($sidebar, '<b>Автор:');
$sidebar_search = trim(mb_substr($sidebar, $pos_start));
$pos_end = mb_strpos($sidebar_search, '<br>');
$sidebar_search = trim(mb_substr($sidebar_search, 0, $pos_end));
$sidebar_search = str_replace('<b>Автор:</b>', '', $sidebar_search);
$game->author = trim(strip_tags($sidebar_search));
$sidebar_search = str_replace('<b>Автор:</b>', '', $sidebar_search);
$author = trim(strip_tags($sidebar_search));
$author_url = $this->getLink($sidebar_search);
if (!empty($author)) {
$author_model = Author::findByName($author);
if (empty($author_model)) {
$author_model = new Author();
$author_model->name = $author;
$author_model->url = $author_url;
}
$author = $author_model;
unset($author_model);
}
if (!empty($author)) {
$author->save();
if (!$game->authors()->where('name', $author->name)->exists()) {
$game->authors()->attach($author);
$game->save();
}
}
$pos_start = mb_strpos($sidebar, '<b>Перевод:');
$sidebar_search = trim(mb_substr($sidebar, $pos_start));
$pos_end = mb_strpos($sidebar_search, '<br>');
$sidebar_search = trim(mb_substr($sidebar_search, 0, $pos_end));
$sidebar_search = trim(strip_tags(str_replace('<b>Перевод:</b>', '', $sidebar_search)));
$sidebar_search = trim(mb_substr($sidebar_search, 0, $pos_end));
$sidebar_search = str_replace('<b>Перевод:</b>', '', $sidebar_search);
$sidebar_search = trim(strip_tags($sidebar_search));
$author_url = $this->getLink($sidebar_search);
if ($sidebar_search !== '') {
$game->author .= ', пер. '.$sidebar_search;
}
return $game;
}
$language = Language::findByCode('en');
if (!$game->languages()->where('code', 'en')->exists()) {
$game->languages()->attach($language);
}
$author = Author::findByName($sidebar_search);
if (!$author) {
$author = new Author();
$author->name = $sidebar_search;
$author->is_translator = true;
$author->url = $author_url;
$author->save();
}
if (!$game->authors()->where('name', $author->name)->exists()) {
$game->authors()->attach($author);
$game->save();
}
}
Log::info($game->title);
}
protected function getLink($html) {
if (empty($html)) {
return '';
}
$author_dom = new Crawler($html);
$author_url = '';
if ($author_dom->filter('a')->count() === 0) {
return '';
}
$link = $author_dom->filter('a')->first();
if (!empty($link)) {
$author_url = $link->attr('href');
if (!empty($author_url)) {
$author_url = str_replace('/go?', '', $author_url);
}
}
return (string) $author_url;
}
}

View file

@ -29,6 +29,7 @@
"ryakad/pandoc-php": "^1.0"
},
"require-dev": {
"doctrine/dbal": "^2.10",
"illuminate/log": "^6.6",
"mockery/mockery": "^1.0",
"monolog/monolog": "^2.0",

252
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "9e81c0e5392fa678dfc2853861f439d0",
"content-hash": "9f49824b478378eb0ff2a79995bb4034",
"packages": [
{
"name": "addwiki/mediawiki-api",
@ -3445,6 +3445,256 @@
],
"time": "2019-11-06T16:40:04+00:00"
},
{
"name": "doctrine/cache",
"version": "1.10.0",
"source": {
"type": "git",
"url": "https://github.com/doctrine/cache.git",
"reference": "382e7f4db9a12dc6c19431743a2b096041bcdd62"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/doctrine/cache/zipball/382e7f4db9a12dc6c19431743a2b096041bcdd62",
"reference": "382e7f4db9a12dc6c19431743a2b096041bcdd62",
"shasum": ""
},
"require": {
"php": "~7.1"
},
"conflict": {
"doctrine/common": ">2.2,<2.4"
},
"require-dev": {
"alcaeus/mongo-php-adapter": "^1.1",
"doctrine/coding-standard": "^6.0",
"mongodb/mongodb": "^1.1",
"phpunit/phpunit": "^7.0",
"predis/predis": "~1.0"
},
"suggest": {
"alcaeus/mongo-php-adapter": "Required to use legacy MongoDB driver"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.9.x-dev"
}
},
"autoload": {
"psr-4": {
"Doctrine\\Common\\Cache\\": "lib/Doctrine/Common/Cache"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Guilherme Blanco",
"email": "guilhermeblanco@gmail.com"
},
{
"name": "Roman Borschel",
"email": "roman@code-factory.org"
},
{
"name": "Benjamin Eberlei",
"email": "kontakt@beberlei.de"
},
{
"name": "Jonathan Wage",
"email": "jonwage@gmail.com"
},
{
"name": "Johannes Schmitt",
"email": "schmittjoh@gmail.com"
}
],
"description": "PHP Doctrine Cache library is a popular cache implementation that supports many different drivers such as redis, memcache, apc, mongodb and others.",
"homepage": "https://www.doctrine-project.org/projects/cache.html",
"keywords": [
"abstraction",
"apcu",
"cache",
"caching",
"couchdb",
"memcached",
"php",
"redis",
"xcache"
],
"time": "2019-11-29T15:36:20+00:00"
},
{
"name": "doctrine/dbal",
"version": "v2.10.0",
"source": {
"type": "git",
"url": "https://github.com/doctrine/dbal.git",
"reference": "0c9a646775ef549eb0a213a4f9bd4381d9b4d934"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/doctrine/dbal/zipball/0c9a646775ef549eb0a213a4f9bd4381d9b4d934",
"reference": "0c9a646775ef549eb0a213a4f9bd4381d9b4d934",
"shasum": ""
},
"require": {
"doctrine/cache": "^1.0",
"doctrine/event-manager": "^1.0",
"ext-pdo": "*",
"php": "^7.2"
},
"require-dev": {
"doctrine/coding-standard": "^6.0",
"jetbrains/phpstorm-stubs": "^2019.1",
"phpstan/phpstan": "^0.11.3",
"phpunit/phpunit": "^8.4.1",
"symfony/console": "^2.0.5|^3.0|^4.0|^5.0"
},
"suggest": {
"symfony/console": "For helpful console commands such as SQL execution and import of files."
},
"bin": [
"bin/doctrine-dbal"
],
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.10.x-dev",
"dev-develop": "3.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Doctrine\\DBAL\\": "lib/Doctrine/DBAL"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Guilherme Blanco",
"email": "guilhermeblanco@gmail.com"
},
{
"name": "Roman Borschel",
"email": "roman@code-factory.org"
},
{
"name": "Benjamin Eberlei",
"email": "kontakt@beberlei.de"
},
{
"name": "Jonathan Wage",
"email": "jonwage@gmail.com"
}
],
"description": "Powerful PHP database abstraction layer (DBAL) with many features for database schema introspection and management.",
"homepage": "https://www.doctrine-project.org/projects/dbal.html",
"keywords": [
"abstraction",
"database",
"db2",
"dbal",
"mariadb",
"mssql",
"mysql",
"oci8",
"oracle",
"pdo",
"pgsql",
"postgresql",
"queryobject",
"sasql",
"sql",
"sqlanywhere",
"sqlite",
"sqlserver",
"sqlsrv"
],
"time": "2019-11-03T16:50:43+00:00"
},
{
"name": "doctrine/event-manager",
"version": "1.1.0",
"source": {
"type": "git",
"url": "https://github.com/doctrine/event-manager.git",
"reference": "629572819973f13486371cb611386eb17851e85c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/doctrine/event-manager/zipball/629572819973f13486371cb611386eb17851e85c",
"reference": "629572819973f13486371cb611386eb17851e85c",
"shasum": ""
},
"require": {
"php": "^7.1"
},
"conflict": {
"doctrine/common": "<2.9@dev"
},
"require-dev": {
"doctrine/coding-standard": "^6.0",
"phpunit/phpunit": "^7.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Doctrine\\Common\\": "lib/Doctrine/Common"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Guilherme Blanco",
"email": "guilhermeblanco@gmail.com"
},
{
"name": "Roman Borschel",
"email": "roman@code-factory.org"
},
{
"name": "Benjamin Eberlei",
"email": "kontakt@beberlei.de"
},
{
"name": "Jonathan Wage",
"email": "jonwage@gmail.com"
},
{
"name": "Johannes Schmitt",
"email": "schmittjoh@gmail.com"
},
{
"name": "Marco Pivetta",
"email": "ocramius@gmail.com"
}
],
"description": "The Doctrine Event Manager is a simple PHP event system that was built to be used with the various Doctrine projects.",
"homepage": "https://www.doctrine-project.org/projects/event-manager.html",
"keywords": [
"event",
"event dispatcher",
"event manager",
"event system",
"events"
],
"time": "2019-11-10T09:48:07+00:00"
},
{
"name": "doctrine/instantiator",
"version": "1.3.0",

View file

@ -38,7 +38,7 @@ return [
|
*/
'production' => env('DEBUG'),
'production' => false,
/*
|--------------------------------------------------------------------------

View file

@ -0,0 +1,63 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
use App\Models\Language;
class Languages extends Migration
{
/**
* Run the migrations.
*
* @return void
*/
public function up()
{
$language = new Language();
$language->code = 'ru';
$language->title_en = 'Russian';
$language->title_ru = 'Русский';
$language->save();
$language = new Language();
$language->code = 'en';
$language->title_en = 'English';
$language->title_ru = 'Английский';
$language->save();
$language = new Language();
$language->code = 'de';
$language->title_en = 'Deutsch';
$language->title_ru = 'Немецкий';
$language->save();
$language = new Language();
$language->code = 'fr';
$language->title_en = 'French';
$language->title_ru = 'Французский';
$language->save();
$language = new Language();
$language->code = 'cn';
$language->title_en = 'Chinese';
$language->title_ru = 'Китайский';
$language->save();
$language = new Language();
$language->code = 'ja';
$language->title_en = 'Japanese';
$language->title_ru = 'Японский';
$language->save();
}
/**
* Reverse the migrations.
*
* @return void
*/
public function down()
{
Language::all()->delete();
}
}

View file

@ -0,0 +1,34 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
class Authors extends Migration
{
/**
* Run the migrations.
*
* @return void
*/
public function up()
{
Schema::table('authors', function (Blueprint $table) {
$table->string('url')->nullable()->change();
$table->boolean('is_company')->nullable()->change();
$table->boolean('is_person')->nullable()->change();
$table->text('description')->nullable()->change();
$table->boolean('is_translator')->nullable();
});
}
/**
* Reverse the migrations.
*
* @return void
*/
public function down()
{
return true;
}
}