<?php
$page_title='Crawler سایت';
require __DIR__.'/_init.php';

function fetch_urls_from_sitemap($url){
  $xml=@file_get_contents($url);
  if(!$xml) return [];
  $root=simplexml_load_string($xml);
  $ns=$root->getNamespaces(true);
  $urls=[];
  foreach($root->sitemap as $sm){
    $loc=(string)$sm->loc;
    $urls=array_merge($urls, fetch_urls_from_sitemap($loc));
  }
  foreach($root->url as $u){
    $urls[]=(string)$u->loc;
  }
  return array_unique($urls);
}

$report=null;
if($_SERVER['REQUEST_METHOD']==='POST'){
  $sitemap=setting_get($pdo,'sitemap_url','');
  $urls=fetch_urls_from_sitemap($sitemap);
  $new=$upd=$skip=0;
  foreach($urls as $u){
    $html=@file_get_contents($u);
    if(!$html){$skip++; continue;}
    preg_match('/<title>(.*?)<\/title>/i',$html,$m);
    $title=strip_tags($m[1]??'');
    $content=trim(preg_replace('/\s+/',' ',strip_tags($html)));
    $uh=md5($u); $ch=md5($content);
    $st=$pdo->prepare("SELECT content_hash FROM site_data WHERE url_hash=?");
    $st->execute([$uh]);
    $row=$st->fetch();
    if(!$row){
      $pdo->prepare("INSERT INTO site_data(url,url_hash,title,content,content_hash) VALUES(?,?,?,?,?)")
          ->execute([$u,$uh,$title,$content,$ch]);
      $new++;
    }elseif($row['content_hash']!==$ch){
      $pdo->prepare("UPDATE site_data SET title=?,content=?,content_hash=? WHERE url_hash=?")
          ->execute([$title,$content,$ch,$uh]);
      $upd++;
    }else $skip++;
  }
  $report="جدید: $new | بروزرسانی: $upd | رد شده: $skip";
}

require __DIR__.'/_layout_top.php';
?>
<div class="p-card">
<h3>اجرای کراولر</h3>
<form method="post">
<button class="p-btn p-btn--primary" type="submit">بروزرسانی دیتای سایت</button>
</form>
<?php if($report):?><div class="p-badge p-badge--ok" style="margin-top:10px"><?=$report?></div><?php endif;?>
</div>
<?php require __DIR__.'/_layout_bottom.php'; ?>
