<?php

namespace App\Http\Controllers;

use App\Models\Site;
use App\Models\Links;
use App\Jobs\ScrapingJob;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\DB;
use App\Http\Requests\ScrapingRequest;
use Symfony\Component\BrowserKit\HttpBrowser;

// use Symfony\Component\BrowserKit\HttpBrowser;

class ScrapingController extends Controller
{

    public function index()
    {
        $sites =  Site::paginate(15);
        foreach($sites as $site){
            $pattern = $site->domain;
            $site->jobs = DB::table('jobs')
                    ->whereRaw('payload REGEXP ?', [$pattern])
                    ->exists();
        }
       
        return view('main_content.sites.add_domain', compact('sites'));
    }
    public function links(ScrapingRequest $request)
    {


        // ini_set('max_input_time', '200M');
        $url = $request->url;
        // $url = "https://www.bayt.com";


        $host = parse_url($url)['host'];
        $host_arr = explode('.', $host);
        $host_formate = implode('\\.', $host_arr);
        $pattern = "/^https:\/\/$host_formate\/.+(\?.*)?$/";

        $domain = Site::firstOrCreate([
            'domain' => $host
        ]);

        $domain->links()->delete();


        dispatch(new ScrapingJob($url, $domain, $pattern));

        return redirect()->route('dashboard');
    }

    public function view(Site $site)
    {
        $site = $site->with('emails')->find($site->id);
        return view('main_content.sites.view', compact('site'));
    }

    public function test()
    {
        ini_set('max_input_time', '200M');
        $i = 0;
        $url = "https://www.cairo24.com/1933396";

        $host = parse_url($url)['host'];
        $host_arr = explode('.', $host);
        $host_formate = implode('\\.', $host_arr);
        $pattern = "/^https:\/\/$host_formate\/.+(\?.*)?$/";
        $domain = Site::firstOrCreate([
            'domain' => $host
        ]);

        $domain->links()->delete();
        $tags = $this->slugs($url, $domain, $pattern);

        if (!empty($tags)) {
            foreach ($tags as $key => $tag) {
                $tags[] = $this->slugs($tag['url'], $domain, $pattern);
            }
        }
        $emails = [];
        foreach ($domain->links()->get() as $tag) {
            logger($tag->url);
            return  $emails[] =  $this->email($tag->url ?? $url, $domain);
        }
        // return $emails;
        return 'done';
    }
    private function slugs($url, $domain, $pattern)
    {

        $browser = new HttpBrowser();
        $browser->setServerParameter(
            'HTTP_USER_AGENT',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
        );
        $crawler = $browser->request('GET', $url);
        $container = $crawler->filter('body');
        $tags = $container->filter('a')->each(function ($node) use ($pattern, $url) {
            $slug = $node->attr('href');
            if (preg_match($pattern, $slug) && $slug != $url && $slug != Links::where(['url' => $slug])->exists()) {
                return ['url' => (string) $slug];
            }
        });

        $tags = array_values(array_filter($tags));

        $domain->links()->createMany($tags);
        return  $tags;
    }

    private function email($url, $domain)
    {
        $browser = new HttpBrowser();
        $browser->setServerParameter(
            'HTTP_USER_AGENT',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
        );
        $crawler = $browser->request('GET', $url);

        return $container = $crawler->filter('body')->text();
        // $container = "ahmed@ahmed.com asdasd@fasfa.as";

        preg_match_all('/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/', $container, $matches);
        foreach ($matches[0] as $email) {
            $domain->emails()->firstOrCreate([
                'email' => $email,
            ]);
        }
        return $matches[0];
    }
}
