<?php
// includes/TelegramScraper.php

class TelegramScraper {
    private $baseUrl = 'https://t.me/s/';
    private $debug = [];
    
    // اسکرپ کانال با تعداد پست مشخص
    public function scrapeChannel($username, $postCount = 50) {
        $username = $this->cleanUsername($username);
        $url = $this->baseUrl . $username;
        
        $this->debug[] = "🔍 Fetching: $url";
        
        $html = $this->fetch($url);
        
        if (!$html) {
            $this->debug[] = "❌ Cannot fetch channel";
            throw new Exception("Cannot fetch channel: $username");
        }
        
        $this->debug[] = "✅ HTML received: " . strlen($html) . " bytes";
        
        // استخراج پیام‌ها
        $messages = $this->extractMessages($html, $postCount);
        $this->debug[] = "📝 Messages found: " . count($messages);
        
        return [
            'username' => $username,
            'title' => $this->extractTitle($html),
            'messages' => $messages,
            'debug' => $this->debug
        ];
    }
    
    // تمیز کردن یوزرنیم
    private function cleanUsername($username) {
        $username = trim($username);
        $username = preg_replace('/^(https?:\/\/)?(t\.me|telegram\.me)\/(s\/)?/i', '', $username);
        $username = preg_replace('/^@/', '', $username);
        $username = preg_replace('/[\?\/].*$/', '', $username);
        $username = preg_replace('/[^a-zA-Z0-9_]/', '', $username);
        return $username;
    }
    
    // فچ کردن HTML با چندین روش
    private function fetch($url) {
        // روش 1: cURL
        $html = $this->fetchWithCurl($url);
        if ($html) return $html;
        
        // روش 2: file_get_contents
        $html = $this->fetchWithFileGetContents($url);
        if ($html) return $html;
        
        return null;
    }
    
    private function fetchWithCurl($url) {
        if (!function_exists('curl_init')) {
            $this->debug[] = "⚠️ cURL not available";
            return null;
        }
        
        $ch = curl_init();
        
        curl_setopt_array($ch, [
            CURLOPT_URL => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_MAXREDIRS => 5,
            CURLOPT_TIMEOUT => 30,
            CURLOPT_CONNECTTIMEOUT => 10,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
            CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            CURLOPT_HTTPHEADER => [
                'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language: en-US,en;q=0.5',
                'Accept-Encoding: gzip, deflate',
                'Connection: keep-alive',
                'Upgrade-Insecure-Requests: 1',
            ],
            CURLOPT_ENCODING => 'gzip, deflate',
        ]);
        
        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        $error = curl_error($ch);
        curl_close($ch);
        
        $this->debug[] = "cURL HTTP Code: $httpCode";
        
        if ($error) {
            $this->debug[] = "cURL Error: $error";
            return null;
        }
        
        if ($httpCode !== 200) {
            $this->debug[] = "❌ HTTP Error: $httpCode";
            return null;
        }
        
        return $response;
    }
    
    private function fetchWithFileGetContents($url) {
        $context = stream_context_create([
            'http' => [
                'method' => 'GET',
                'header' => [
                    'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
                    'Accept: text/html,application/xhtml+xml',
                ],
                'timeout' => 30,
                'ignore_errors' => true
            ],
            'ssl' => [
                'verify_peer' => false,
                'verify_peer_name' => false,
            ]
        ]);
        
        $html = @file_get_contents($url, false, $context);
        
        if ($html === false) {
            $this->debug[] = "⚠️ file_get_contents failed";
            return null;
        }
        
        return $html;
    }
    
    // استخراج عنوان کانال
    private function extractTitle($html) {
        $patterns = [
            '/<meta property="og:title" content="([^"]+)"/',
            '/<div class="tgme_channel_info_header_title[^"]*"[^>]*>([^<]+)</',
            '/<span dir="auto">([^<]+)<\/span>/'
        ];
        
        foreach ($patterns as $pattern) {
            if (preg_match($pattern, $html, $matches)) {
                return html_entity_decode(trim($matches[1]), ENT_QUOTES, 'UTF-8');
            }
        }
        
        return null;
    }
    
    // استخراج پیام‌ها
    private function extractMessages($html, $limit = 50) {
        $messages = [];
        
        // الگوهای مختلف برای پیدا کردن پیام‌ها
        $patterns = [
            // الگوی اصلی
            '/<div class="tgme_widget_message_text[^"]*"[^>]*>(.*?)<\/div>\s*(?:<div class="tgme_widget_message_footer|<\/div>\s*<\/div>)/s',
            // الگوی جایگزین
            '/<div class="tgme_widget_message_text"[^>]*>(.*?)<\/div>/s',
            // الگوی ساده‌تر
            '/class="tgme_widget_message_text[^"]*"[^>]*>(.*?)<\/div>/s'
        ];
        
        foreach ($patterns as $pattern) {
            preg_match_all($pattern, $html, $matches);
            
            if (!empty($matches[1])) {
                foreach ($matches[1] as $messageHtml) {
                    // تبدیل <br> به newline
                    $text = preg_replace('/<br\s*\/?>/i', "\n", $messageHtml);
                    // حذف تگ‌ها
                    $text = strip_tags($text);
                    // دیکد HTML entities
                    $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
                    $text = trim($text);
                    
                    if (!empty($text) && strlen($text) > 10) {
                        $messages[] = $text;
                    }
                }
                break; // اگر پیدا شد، از حلقه خارج شو
            }
        }
        
        // محدود کردن تعداد
        return array_slice($messages, 0, $limit);
    }
    
    public function getDebug() {
        return $this->debug;
    }
}