Archived
1
0
Fork 0

Перевод на другой парсер HTML и парсинг URQ

This commit is contained in:
Alexander Yakovlev 2018-03-25 01:13:44 +07:00
parent b20cd53b92
commit 8a478a9d15
5 changed files with 139 additions and 10 deletions

View file

@ -1,7 +1,7 @@
<?php
namespace Source;
use \PHPHtmlParser\Dom;
use \Symfony\Component\DomCrawler\Crawler;
use \Game;
abstract class Source {
@ -10,11 +10,14 @@ abstract class Source {
protected $period;
protected $output;
public function __construct() {
$this->dom = new Dom;
$this->period = strtotime("1 week ago");
$this->output = '';
}
public function loadStr($html) {
$this->dom = new Crawler($html);
}
/**
* Function to start the section.
* @param whether to return or print the text

View file

@ -7,15 +7,31 @@ class Urq extends Source {
public $title = "Библиотека URQ";
protected function parse() {
$text = $this->get_text('http://urq.plut.info/node/209');
$this->dom->loadStr($text, []);
$this->loadStr($text);
unset($text);
$games = $this->dom->find('.view-NewGames tr');
foreach ($games as $gameBlock) {
$game = new Game;
$game->author = trim($gameBlock->find('.views-field-taxonomy-vocabulary-2')->innerHtml);
$game->title = trim($gameBlock->find('.views-field-title')->innerHtml);
$game->author = trim($gameBlock->find('.views-field-taxonomy-vocabulary-2')->first()->text());
$game->title = trim($gameBlock->find('.views-field-title')->text);
$game->url = trim($gameBlock->find('.views-field-title a')->getAttribute('href'));
$this->output .= $game->print();
}
}
public function checkPage($url) {
return (strpos($url,'http://urq.plut.info/node/') !== FALSE);
}
public function page($url) {
$text = $this->get_text($url);
$this->loadStr($text, []);
unset($text);
$game = new Game;
$game->url = $url;
$gameBlock = $this->dom->filter('.content');
$game->author = trim($gameBlock->filter('.field-name-taxonomy-vocabulary-2 a')->first()->text());
$game->title = trim($this->dom->filter('h1.title')->first()->text());
$game->description = $gameBlock->filter('.field-type-text-with-summary .field-items span.field-item')->first()->text();
$game->platform = $gameBlock->filter('.field-name-taxonomy-vocabulary-5 .field-items span.field-item a')->first()->text();
return $game;
}
}

View file

@ -68,7 +68,7 @@ class Wikipage {
protected function makeContent() {
$this->content = '{{game info';
$this->txtadd('title', ' |название='.$this->game->title.'"');
$this->txtadd('title', ' |название='.$this->game->title);
if (is_array($this->game->author) && count($this->game->author) === 1) {
$this->game->author = trim($this->game->author[0]);
}
@ -93,7 +93,7 @@ class Wikipage {
$this->content .= "\n}}\n";
$this->txtadd('description', "#{@game.description}");
$this->txtadd('description', $this->game->description);
if (!empty($this->game->url_download) || !empty($this->game->url_online)) {
$this->content .= "\n== Версии ==";
}

View file

@ -1,9 +1,10 @@
{
"require": {
"paquettg/php-html-parser": "^1.7",
"aura/autoload": "^2.0",
"sabre/xml": "^2.0",
"league/html-to-markdown": "^4.4",
"addwiki/mediawiki-api": "^0.7.2"
"addwiki/mediawiki-api": "^0.7.2",
"symfony/dom-crawler": "^4.0",
"symfony/css-selector": "^4.0"
}
}

111
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"content-hash": "f18a94a205dd3f539f5d191093fc70e7",
"content-hash": "9926cb2730d267c41d2f5879b63b3b1d",
"packages": [
{
"name": "addwiki/mediawiki-api",
@ -917,6 +917,59 @@
"homepage": "https://symfony.com",
"time": "2017-07-29T21:27:59+00:00"
},
{
"name": "symfony/css-selector",
"version": "v4.0.6",
"source": {
"type": "git",
"url": "https://github.com/symfony/css-selector.git",
"reference": "c69f1e93aa898fd9fec627ebef467188151c8dc2"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/css-selector/zipball/c69f1e93aa898fd9fec627ebef467188151c8dc2",
"reference": "c69f1e93aa898fd9fec627ebef467188151c8dc2",
"shasum": ""
},
"require": {
"php": "^7.1.3"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "4.0-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\CssSelector\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Jean-François Simon",
"email": "jeanfrancois.simon@sensiolabs.com"
},
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony CssSelector Component",
"homepage": "https://symfony.com",
"time": "2018-02-03T14:58:37+00:00"
},
{
"name": "symfony/debug",
"version": "v3.3.6",
@ -973,6 +1026,62 @@
"homepage": "https://symfony.com",
"time": "2017-07-28T15:27:31+00:00"
},
{
"name": "symfony/dom-crawler",
"version": "v4.0.6",
"source": {
"type": "git",
"url": "https://github.com/symfony/dom-crawler.git",
"reference": "26726ddc01601dc9393f2afc3369ce1ca64e4537"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/dom-crawler/zipball/26726ddc01601dc9393f2afc3369ce1ca64e4537",
"reference": "26726ddc01601dc9393f2afc3369ce1ca64e4537",
"shasum": ""
},
"require": {
"php": "^7.1.3",
"symfony/polyfill-mbstring": "~1.0"
},
"require-dev": {
"symfony/css-selector": "~3.4|~4.0"
},
"suggest": {
"symfony/css-selector": ""
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "4.0-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\DomCrawler\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony DomCrawler Component",
"homepage": "https://symfony.com",
"time": "2018-02-22T10:50:29+00:00"
},
{
"name": "symfony/event-dispatcher",
"version": "v3.3.6",