- 01
- 02
- 03
- 04
- 05
- 06
- 07
- 08
- 09
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
do {
$entries = $xpath->query("//div[@class='identity']/img");
if(isset($entries[0])) break;
$entries = $xpath->query("//h1[@class='avatared']/a/img");
if(isset($entries[0])) break;
$entries = $xpath->query("//div[@class='avatared']/a/img");
if(isset($entries[0])) break;
$entries = $xpath->query("//div[@itemtype='http://schema.org/Person']/a/img");
} while(false);
if(!isset($entries[0])) continue;
$src = $entries[0]->getAttribute('src');
if(!preg_match('#[/=]([0-9a-f]{32})[\?&]#', $src, $matches)) continue;
$hash = $matches[1];
// спустя несколько строк
do {
$entries = $xpath->query("//div[@class='email']/script");
if(isset($entries[0])) break;
$entries = $xpath->query("//dl/dd[@class='email']/script");
} while(false);
if(isset($entries[0])) {
$rawcode = $entries[0]->textContent;
if(!preg_match("#eval\(decodeURIComponent\('(.*)'\)\)#", $rawcode, $matches)) continue;
$rawcode2 = urldecode($matches[1]);
if(!preg_match('#href=\\\\?"mailto:([^"\\\\]*)\\\\?"#', $rawcode2, $matches)) continue;
$email = $matches[1];
unset($entries);
} else do {
$entries = $xpath->query("//div[@class='avatared']/div[@class='details']/dl/dd/a[@data-email]");
if(isset($entries[0])) break;
$entries = $xpath->query("//ul[@class='vcard-details']/li[@class='vcard-detail']/a[@data-email]");
} while(false);
if(isset($entries[0])) {
$email = urldecode($entries[0]->getAttribute('data-email'));
}
Прототип программы, вытягивающей хэш аватарки и е-мейл из архивной копии профиля в «Гитхабе».
Nyancat 21.07.2021 08:00 # 0
#гитхаб #граватар #вебархив #xpath #data-minig #bigdata #хуй-ня
LinuxGovno 21.07.2021 10:12 # +4
rotoeb 21.07.2021 15:38 # +6
MAKAKA 18.08.2021 18:48 # 0
LinuxGovno 21.07.2021 20:21 # +1