<?php

namespace App\Services;

use App\Models\SourceFile;
use App\Models\Voter;
use Carbon\Carbon;
use Illuminate\Support\Facades\File;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
use Normalizer;
use Smalot\PdfParser\Parser;
use Symfony\Component\Process\Process;

class VoterPdfImportService
{
    public function import(SourceFile $sourceFile, ?string $genderOverride = null): array
    {
        try {
            $sourceFile->loadMissing('area');
            $filePath = Storage::path($sourceFile->stored_path);

            if (! file_exists($filePath)) {
                throw new \RuntimeException('ফাইল পাওয়া যায়নি: '.$filePath);
            }

            $text = $this->getText($filePath);
            $summary = $this->importText($sourceFile, $text, $genderOverride, true, false);
        } catch (\Throwable $e) {
            $sourceFile->update([
                'parsed_successfully' => false,
                'parsed_at' => now(),
                'notes' => 'পার্সিং ব্যর্থ: '.$e->getMessage(),
            ]);

            $summary = [
                'total_lines' => 0,
                'number_of_voters_parsed' => 0,
                'number_of_failed_blocks' => 0,
                'failed_blocks' => [],
                'error_message' => $e->getMessage(),
            ];
        }

        return $summary;
    }

    public function importPage(SourceFile $sourceFile, int $page, ?string $genderOverride = null): array
    {
        $sourceFile->loadMissing('area');
        $filePath = Storage::path($sourceFile->stored_path);

        if (! file_exists($filePath)) {
            throw new \RuntimeException('ফাইল পাওয়া যায়নি: '.$filePath);
        }

        $textLayer = '';
        if ($this->env('VM_USE_PDFTOTEXT', false)) {
            $textLayer = $this->pdfToTextPage($filePath, $page);
        }
        $summary = null;
        $baseSource = 'ocr';

        if ($this->looksLikeVoterText($textLayer)) {
            $summary = $this->importText($sourceFile, $textLayer, $genderOverride, false, false);
            $baseSource = 'text';
        }

        if (! $summary || $summary['number_of_voters_parsed'] === 0) {
            $text = $this->ocrPageText($filePath, $page);
            $summary = $this->importText($sourceFile, $text, $genderOverride, false, false);
            $baseSource = 'ocr';
        }

        if ($baseSource === 'ocr' && $this->shouldRunFallbackOcr($summary) && $this->looksLikeVoterText($textLayer)) {
            $textSummary = $this->importText($sourceFile, $textLayer, $genderOverride, false, true);
            $summary = $this->mergeSummaries($summary, $textSummary);
        }

        if ($baseSource === 'text') {
            $ocrText = $this->ocrPageText($filePath, $page);
            $ocrSummary = $this->importText($sourceFile, $ocrText, $genderOverride, false, true);
            $summary = $this->mergeSummaries($summary, $ocrSummary);
        }

        if ($this->shouldRunFallbackOcr($summary)) {
            foreach ($this->getFallbackPsms() as $psm) {
                $fallbackText = $this->ocrPageText($filePath, $page, $psm);
                $fallbackSummary = $this->importText($sourceFile, $fallbackText, $genderOverride, false, true);
                $summary = $this->mergeSummaries($summary, $fallbackSummary);
                if (! $this->shouldRunFallbackOcr($summary)) {
                    break;
                }
            }
        }

        if ($this->shouldUseBoxOcr() && $this->shouldRunGridFallback($summary)) {
            foreach ($this->getFallbackGrids() as [$cols, $rows]) {
                $fallbackText = $this->ocrPageTextByGrid($filePath, $page, null, $cols, $rows);
                $fallbackSummary = $this->importText($sourceFile, $fallbackText, $genderOverride, false, true);
                $summary = $this->mergeSummaries($summary, $fallbackSummary);
                if (! $this->shouldRunGridFallback($summary)) {
                    break;
                }
            }
        }

        if ($this->envBool('VM_OCR_PREPROCESS', true) && $this->shouldRunFallbackOcr($summary)) {
            $rawGridText = $this->ocrPageTextByGrid($filePath, $page, null, null, null, false);
            $rawGridSummary = $this->importText($sourceFile, $rawGridText, $genderOverride, false, true);
            $summary = $this->mergeSummaries($summary, $rawGridSummary);

            $fallbackThreshold = $this->env('VM_OCR_PREPROCESS_THRESHOLD_FALLBACK');
            if ($fallbackThreshold !== null && $this->shouldRunFallbackOcr($summary)) {
                $fallbackThreshold = (int) $fallbackThreshold;
                $thresholdText = $this->ocrPageTextByGrid($filePath, $page, null, null, null, true, $fallbackThreshold);
                $thresholdSummary = $this->importText($sourceFile, $thresholdText, $genderOverride, false, true);
                $summary = $this->mergeSummaries($summary, $thresholdSummary);
            }
        }

        if ($this->envBool('VM_OCR_MERGE_FULL_PAGE', false)) {
            $fullText = $this->ocrPageTextSimple($filePath, $page);
            $fullSummary = $this->importText($sourceFile, $fullText, $genderOverride, false, true);
            $summary = $this->mergeSummaries($summary, $fullSummary);
        }

        if ($this->envBool('VM_OCR_MERGE_FULL_PAGE', false) && $this->shouldRunFallbackOcr($summary)) {
            foreach ($this->getFallbackPsms() as $psm) {
                $fullText = $this->ocrPageTextSimple($filePath, $page, $psm);
                $fullSummary = $this->importText($sourceFile, $fullText, $genderOverride, false, true);
                $summary = $this->mergeSummaries($summary, $fullSummary);
                if (! $this->shouldRunFallbackOcr($summary)) {
                    break;
                }
            }
        }

        $dpiFallback = (int) $this->env('VM_OCR_DPI_FALLBACK', 0);
        $baseDpi = (int) $this->env('VM_OCR_DPI', 300);
        if ($dpiFallback > 0 && $dpiFallback !== $baseDpi && $this->shouldRunFallbackOcr($summary)) {
            $dpiText = $this->ocrPageTextByGrid($filePath, $page, null, null, null, null, null, $dpiFallback);
            $dpiSummary = $this->importText($sourceFile, $dpiText, $genderOverride, false, true);
            $summary = $this->mergeSummaries($summary, $dpiSummary);
        }

        return $summary;
    }

    private function importText(SourceFile $sourceFile, string $text, ?string $genderOverride, bool $updateSourceFile, bool $mergeOnlyMissing): array
    {
        $summary = [
            'total_lines' => 0,
            'number_of_voters_parsed' => 0,
            'number_of_failed_blocks' => 0,
            'failed_blocks' => [],
            'missing_fields' => [
                'father_name_bn' => 0,
                'mother_name_bn' => 0,
                'profession_bn' => 0,
            ],
        ];

        $blocks = $this->splitBlocks($text);
        $summary['total_lines'] = count($blocks);

        foreach ($blocks as $block) {
            $parsed = $this->parseBlock($block, $sourceFile, $genderOverride);

            if (! $parsed || empty($parsed['voter_no']) || empty($parsed['name_bn'])) {
                $hasVoterNo = preg_match('/ভোটার\s*নং/iu', $block);
                if ($hasVoterNo) {

                    $summary['failed_blocks'][] = trim($block);
                }
                continue;
            }

            $summary = $this->trackMissingFields($summary, $parsed);

            $existing = $this->findExistingVoter($sourceFile, $parsed);

            if ($existing) {
                $updateData = $this->buildMergeData($existing, $parsed, $mergeOnlyMissing);
                if ($this->shouldUpdateVoterNo($existing->voter_no, $parsed['voter_no'] ?? null)) {
                    $updateData['voter_no'] = $parsed['voter_no'];
                }
                if (! $existing->source_file_id) {
                    $updateData['source_file_id'] = $sourceFile->id;
                }
                if (! empty($updateData)) {
                    $existing->update($updateData);
                }
            } else {
                Voter::create(array_merge($parsed, [
                    'area_id' => $sourceFile->area_id,
                    'source_file_id' => $sourceFile->id,
                    'gender' => $parsed['gender'] ?? $this->mapGenderFromArea($sourceFile, $genderOverride),
                    'extra_json' => ['raw_block' => trim($block)],
                ]));
            }

            $this->cleanupInvalidSerialDuplicates($sourceFile, $parsed['serial_no_pdf_bn'] ?? null, $parsed['voter_no'] ?? null);

            $summary['number_of_voters_parsed']++;
        }

        $summary['number_of_failed_blocks'] = count($summary['failed_blocks']);

        if ($updateSourceFile) {
            $parsedSuccessfully = $summary['number_of_failed_blocks'] === 0 && $summary['number_of_voters_parsed'] > 0;
            $sourceFile->update([
                'parsed_successfully' => $parsedSuccessfully,
                'parsed_at' => now(),
                'notes' => $summary['number_of_failed_blocks'] ? 'কিছু ব্লক ইম্পোর্ট ব্যর্থ হয়েছে' : 'ইম্পোর্ট সম্পন্ন',
            ]);

            if ($sourceFile->area) {
                $sourceFile->area->update(['total_voters' => $sourceFile->area->voters()->count()]);
            }
        }

        return $summary;
    }

    private function splitBlocks(string $text): array
    {
        if ($text === '') {
            return [];
        }

        if (str_contains($text, '<<<VM_BOX>>>')) {
            $parts = array_filter(array_map('trim', explode('<<<VM_BOX>>>', $text)));

            return array_values($parts);
        }

        $normalized = preg_replace('/\r\n?/', "\n", $text);
        $columnBlocks = $this->splitColumnBlocks($normalized);
        // Ensure each serial starts new line
        $normalized = preg_replace('/\s*([০-৯0-9]{1,4}\s*\.?\s*নাম[:：])/u', "\n$1", $normalized);
        // Also split on serial markers even when "নাম" is missing
        $normalized = preg_replace('/\s*([০-৯0-9]{1,4}\s*(?:\.|\\x{0964}))\s*/u', "\n$1 ", $normalized);

        $parts = preg_split('/(?=^\s*[০-৯0-9]{1,4}\s*\.?\s*নাম[:：])/mu', $normalized);
        $blocks = [];
        foreach ($parts as $part) {
            $trimmed = trim($part);
            if ($trimmed !== '') {
                $blocks[] = $trimmed;
            }
        }

        if (! empty($columnBlocks)) {
            $columnCount = count($columnBlocks);
            if ($columnCount >= 10 && $columnCount >= count($blocks)) {
                $blocks = $columnBlocks;
            }
        }

        if (count($blocks) < 1200) {
            if (preg_match_all('/(^\s*[০-৯0-9]{1,4}\s*(?:\.|\\x{0964}).*?)(?=^\s*[০-৯0-9]{1,4}\s*(?:\.|\\x{0964})|\z)/msu', $normalized, $m)) {
                $serialBlocks = array_values(array_filter(array_map('trim', $m[1])));
                if (count($serialBlocks) > count($blocks)) {
                    $blocks = $serialBlocks;
                }
            }
        }

        if (count($blocks) < 960) {
            if (preg_match_all('/(ভোটার\s*নং[:：]?\s*[০-৯0-9]+.*?)(?=ভোটার\s*নং[:：]?\s*[০-৯0-9]+|\z)/su', $normalized, $m)) {
                $fallback = array_values(array_filter(array_map('trim', $m[1])));
                if (count($fallback) > count($blocks)) {
                    $blocks = $fallback;
                }
            }
        }

        return $blocks;
    }

    private function splitColumnBlocks(string $text): array
    {
        $lines = preg_split('/\n+/', $text);
        $blocks = [];
        $currentRow = [];
        $hasMultiColumn = false;

        foreach ($lines as $line) {
            $line = trim($line);
            if ($line === '') {
                continue;
            }

            $serialMatches = preg_match_all('/[০-৯0-9]{1,4}\s*\.?\s*নাম[:：]/u', $line);
            if ($serialMatches) {
                if (! empty($currentRow)) {
                    $blocks = array_merge($blocks, $this->buildRowBlocks($currentRow));
                    $currentRow = [];
                }
                if ($serialMatches > 1) {
                    $hasMultiColumn = true;
                }
                $currentRow[] = $line;
                continue;
            }

            if (! empty($currentRow)) {
                $currentRow[] = $line;
                if (preg_match('/ঠিকানা[:：]/u', $line)) {
                    $blocks = array_merge($blocks, $this->buildRowBlocks($currentRow));
                    $currentRow = [];
                }
            }
        }

        if (! empty($currentRow)) {
            $blocks = array_merge($blocks, $this->buildRowBlocks($currentRow));
        }

        if (! $hasMultiColumn) {
            return [];
        }

        return array_values(array_filter(array_map('trim', $blocks)));
    }

    private function buildRowBlocks(array $rowLines): array
    {
        $columnsByLine = [];
        $maxColumns = 0;

        foreach ($rowLines as $line) {
            $columns = $this->splitLineIntoColumns($line);
            $columnsByLine[] = $columns;
            $maxColumns = max($maxColumns, count($columns));
        }

        $blocks = [];
        for ($i = 0; $i < $maxColumns; $i++) {
            $lines = [];
            foreach ($columnsByLine as $columns) {
                if (isset($columns[$i]) && trim($columns[$i]) !== '') {
                    $lines[] = trim($columns[$i]);
                }
            }
            if (! empty($lines)) {
                $blocks[] = implode("\n", $lines);
            }
        }

        return $blocks;
    }

    private function splitLineIntoColumns(string $line): array
    {
        $line = preg_replace('/\s{2,}/u', '  ', $line);
        $columns = preg_split('/\s{2,}/u', $line);

        return array_values(array_filter(array_map('trim', $columns)));
    }

    private function parseBlock(string $block, SourceFile $sourceFile, ?string $genderOverride = null): ?array
    {
        $block = $this->normalizeBlock($block);

        $serial = $this->extract('/^\s*([০-৯0-9]+)\s*\.?/mu', $block, 1);
        $name = $this->extract('/নাম[:：ঃ]?\s*(.+?)(?=\s+ভোটার\s*নং|$)/u', $block);
        if (! $name) {
            $firstLine = trim(strtok($block, "\n"));
            if ($firstLine && preg_match('/^\s*[০-৯0-9]+\s*\.?\s*(?:নাম[:：ঃ]?\s*)?(.+)/u', $firstLine, $m)) {
                $name = trim($m[1]);
            }
        }
        if (! $name) {
            $lines = array_values(array_filter(array_map('trim', preg_split('/\n+/', $block))));
            if (count($lines) > 1 && preg_match('/^\s*[০-৯0-9]+\s*\.?$/u', $lines[0])) {
                $candidate = trim($lines[1]);
                if ($candidate !== '' && ! $this->lineStartsWithLabel($candidate)) {
                    $name = $candidate;
                }
            }
        }
        $voterNo = $this->extract('/ভোটার\s*নং[:：ঃ]?\s*([০-৯0-9]+)/u', $block);
        if (! $voterNo && preg_match_all('/(?:[০-৯0-9][\s\-]?){10,}/u', $block, $m)) {
            $candidates = array_map(fn ($match) => preg_replace('/[^০-৯0-9]/u', '', $match), $m[0]);
            $candidates = array_values(array_filter($candidates, fn ($value) => strlen($value) >= 10 && strlen($value) <= 17));
            if (! empty($candidates)) {
                usort($candidates, fn ($a, $b) => strlen($b) <=> strlen($a));
                $voterNo = $candidates[0];
            }
        }
        $father = $this->extract('/পিতা[:：ঃ]?\s*(.+?)(?=\s+মাতা[:：ঃ]|\s+পিতা[:：ঃ]|$)/u', $block);
        $mother = $this->extract('/মাতা[:：ঃ]?\s*(.+?)(?=\s+পেশা[:：ঃ]|\s+মাতা[:：ঃ]|$)/u', $block);
        $address = $this->extract('/ঠিকানা[:：ঃ]?\s*(.+)/u', $block);
        $parentFallback = $this->extractParentsFromLines($block);
        $lineFallback = $this->inferFieldsFromLineOrder($block);
        $father = $father ?: $parentFallback['father'] ?: $lineFallback['father'];
        $mother = $mother ?: $parentFallback['mother'] ?: $lineFallback['mother'];

        $profession = null;
        $professionData = $this->extractProfessionAndDob($block);
        $professionLine = $professionData['line'] ?: $lineFallback['profession_line'];
        $dobString = $professionData['dob'] ?: $lineFallback['dob'];

        if ($professionLine) {
            // Primary: split on জন্ম তারিখ inside the profession line
            if (preg_match('/জন(?:্ম)?\s*তারিখ[:：ঃ\\s]*([০-৯0-9\/\-]+)/u', $professionLine, $mDob)) {
                $dobString = $dobString ?: ($mDob[1] ?? null);
                $profession = $this->trimBanglaPunctuation(preg_split('/জন(?:্ম)?\s*তারিখ[:：ঃ\\s]*/u', $professionLine)[0] ?? $professionLine);
            } elseif (preg_match('/পেশা[:：ঃ]?\s*([^,]+)(?:[,，]?\s*জন(?:্ম)?\s*তারিখ[:：ঃ\\s]*([০-৯0-9\/\-]+))?/u', $professionLine, $match)) {
                $profession = trim($match[1]);
                $dobString = $dobString ?: ($match[2] ?? null);
            } else {
                $profession = trim($professionLine);
            }
        }
        if ($profession) {
            $profession = preg_replace('/,?\s*জন্ম[ম]?\s*তারি?খ.*$/u', '', $profession);
            $profession = $this->trimBanglaPunctuation($profession);
        }

        if (! $dobString) {
            $dobString = $this->extract('/জন(?:্ম)?\s*তারিখ[:：ঃ\\s]*([০-৯0-9\/\-]+)/u', $block);
        }

        $dateOfBirth = $this->parseDate($dobString);
        $dateOfBirthBn = $dobString ? preg_replace('/[^০-৯\\/\\-]/u', '', $dobString) : null;

        // Fallback: if DOB missing, try to capture Bangla date anywhere in the block
        if (! $dateOfBirth) {
            if (preg_match('/([০-৯]{2}\\/[০-৯]{2}\\/[০-৯]{4})/u', $block, $m)) {
                $dateOfBirthBn = $m[1];
                $dateOfBirth = $this->parseDate($dateOfBirthBn);
            } elseif (preg_match('/([০-৯]{2}\\-[০-৯]{2}\\-[০-৯]{4})/u', $block, $m)) {
                $dateOfBirthBn = str_replace('-', '/', $m[1]);
                $dateOfBirth = $this->parseDate($dateOfBirthBn);
            }
        }
        $address = $address ?: $lineFallback['address'];

        // Keep voter number in Bangla digits (allow ASCII digits too)
        $voterNo = $voterNo ? preg_replace('/[^0-9\x{09E6}-\x{09EF}]/u', '', $voterNo) : '';
        $voterNo = $voterNo ? $this->normalizeBnDigits($voterNo) : '';
        if (! $this->isValidVoterNo($voterNo)) {
            $voterNo = '';
        }
        $serialNoBn = $serial ? preg_replace('/[^0-9\x{09E6}-\x{09EF}]/u', '', $serial) : null;
        $serialNoBn = $serialNoBn ? $this->normalizeBnDigits($serialNoBn) : null;
        $serialNo = $serialNoBn ? (int) $this->bnToEnDigits($serialNoBn) : null;
        $serialMin = (int) $this->env('VM_OCR_SERIAL_MIN', 1);
        $serialMax = (int) $this->env('VM_OCR_SERIAL_MAX', 9999);
        if ($serialNo !== null && ($serialNo < $serialMin || $serialNo > $serialMax)) {
            $serialNo = null;
            $serialNoBn = null;
        }

        if ($name && preg_match('/^[\p{P}\p{Zs}]+$/u', $name)) {
            $name = null;
        }
        if (! $voterNo || ! $name) {
            return null;
        }

        
        $fatherValue = $this->sanitizePersonName($this->sanitizeLabelValue($father, 'পিতা'));
        $motherLabelPresent = (bool) preg_match('/(মাতা|মতা)\s*[:ঃ]/u', $block);
        $motherValue = $motherLabelPresent
            ? $this->sanitizePersonName($this->sanitizeLabelValue($mother, 'মাতা'))
            : null;

        return [
            'serial_no' => $serialNo,
            'serial_no_pdf_bn' => $serialNoBn,
            'voter_no' => $voterNo,
            'name_bn' => $this->cleanBangla($this->stripLatinNoise($name)),
            'father_name_bn' => $this->cleanBangla($this->stripLatinNoise($fatherValue)),
            'mother_name_bn' => $this->cleanBangla($this->stripLatinNoise($motherValue)),
            'profession_bn' => $this->cleanBangla($this->stripLatinNoise($this->sanitizeLabelValue($profession, 'পেশা'))),
            'date_of_birth' => $dateOfBirth,
            'date_of_birth_bn' => $dateOfBirthBn,
            'address_bn' => $this->cleanBangla($address),
            'name_en' => null,
            'gender' => $this->mapGenderFromArea($sourceFile, $genderOverride),
        ];
    }

    private function extractParentsFromLines(string $block): array
    {
        $lines = preg_split('/\n+/', $block);
        $parents = ['father' => null, 'mother' => null];
        $currentLabel = null;
        $maybeMotherAfterFather = false;
        $seenVoterNo = false;

        foreach ($lines as $line) {
            $line = trim($line);
            if ($line === '') {
                continue;
            }

            if (preg_match('/^ভোটার\s*নং[:：ঃ]?/u', $line) || preg_match('/^ভাটার\s*নং[:：ঃ]?/u', $line)) {
                $seenVoterNo = true;
                $currentLabel = null;
                $maybeMotherAfterFather = false;
                continue;
            }

            if ($this->lineHasVoterNo($line)) {
                $seenVoterNo = true;
                $currentLabel = null;
                $maybeMotherAfterFather = false;
                continue;
            }

            if (preg_match('/^(পিতা|মাতা)\s*[:：ঃ]?\s*(.*)$/u', $line, $match)) {
                $labelKey = $match[1] === 'পিতা' ? 'father' : 'mother';
                $value = trim($match[2] ?? '');
                if ($value !== '') {
                    $parents[$labelKey] = $value;
                    $currentLabel = null;
                    $maybeMotherAfterFather = ($labelKey === 'father');
                    $seenVoterNo = false;
                } else {
                    $currentLabel = $labelKey;
                    $maybeMotherAfterFather = false;
                    $seenVoterNo = false;
                }
                continue;
            }

            if (preg_match('/^(নাম|ভোটার\s*নং|পেশা|ঠিকানা|জন্ম\s*তারিখ)\s*[:：ঃ]?/u', $line)) {
                $currentLabel = null;
                $maybeMotherAfterFather = false;
                $seenVoterNo = false;
                continue;
            }

            if ($seenVoterNo && ! $parents['father']) {
                if ($this->looksLikeNameLine($line)) {
                    $parents['father'] = $line;
                    $seenVoterNo = false;
                    $maybeMotherAfterFather = true;
                    continue;
                }
            }

            if ($currentLabel) {
                $parents[$currentLabel] = $line;
                $maybeMotherAfterFather = ($currentLabel === 'father');
                $currentLabel = null;
                $seenVoterNo = false;
                continue;
            }

            if ($maybeMotherAfterFather && ! $parents['mother']) {
                $maybeMotherAfterFather = false;
                $seenVoterNo = false;
                continue;
            }

            $maybeMotherAfterFather = false;
        }

        return $parents;
    }

    private function inferFieldsFromLineOrder(string $block): array
    {
        $lines = array_values(array_filter(array_map('trim', preg_split('/\n+/', $block))));
        $result = [
            'father' => null,
            'mother' => null,
            'profession_line' => null,
            'dob' => null,
            'address' => null,
        ];

        if (empty($lines)) {
            return $result;
        }

        $voterIndex = null;
        foreach ($lines as $i => $line) {
            if (preg_match('/^ভোটার\s*নং[:：ঃ]?/u', $line)
                || preg_match('/^ভাটার\s*নং[:：ঃ]?/u', $line)
                || $this->lineHasVoterNo($line)) {
                $voterIndex = $i;
                break;
            }
        }

        if ($voterIndex === null) {
            return $result;
        }

        $cursor = $voterIndex + 1;
        if (isset($lines[$cursor])) {
            $candidate = $this->stripLeadingLabel($lines[$cursor], 'পিতা');
            if ($this->looksLikeNameLine($candidate)) {
                $result['father'] = $candidate;
                $cursor++;
            }
        }
        if (isset($lines[$cursor]) && preg_match('/^মাতা\s*[:：ঃ]?/u', $lines[$cursor])) {
            $candidate = $this->stripLeadingLabel($lines[$cursor], 'মাতা');
            if ($this->looksLikeNameLine($candidate)) {
                $result['mother'] = $candidate;
                $cursor++;
            }
        }

        for ($i = $cursor; $i < count($lines); $i++) {
            $line = $lines[$i];

            if ($result['address'] === null && preg_match('/ঠিকানা[:：ঃ]?/u', $line)) {
                $result['address'] = $this->stripLeadingLabel($line, 'ঠিকানা');
                continue;
            }

            if ($result['profession_line'] === null && $this->looksLikeProfessionLine($line)) {
                $result['profession_line'] = $this->stripLeadingLabel($line, 'পেশা');
                if (preg_match('/জন্ম(?:\s*তারিখ)?[:：ঃ\s]*([০-৯0-9\/\-]+)/u', $line, $match)) {
                    $result['dob'] = $match[1] ?? null;
                }
                continue;
            }
        }

        if ($result['address'] === null) {
            $last = $lines[count($lines) - 1] ?? null;
            if ($last && ! $this->lineStartsWithLabel($last) && $this->looksLikeAddressLine($last)) {
                $result['address'] = $last;
            }
        }

        return $result;
    }

    private function stripLeadingLabel(string $line, string $label): string
    {
        $line = trim($line);
        if ($line === '') {
            return $line;
        }

        return trim(preg_replace('/^'.preg_quote($label, '/').'\s*[:：ঃ]?\s*/u', '', $line));
    }

    private function looksLikeProfessionLine(string $line): bool
    {
        return preg_match('/পেশা|জন্ম\s*তারিখ|জন্ম/u', $line) === 1;
    }

    private function looksLikeAddressLine(string $line): bool
    {
        if (preg_match('/ঠিকানা/u', $line)) {
            return true;
        }

        return substr_count($line, ',') >= 2;
    }

    private function lineHasVoterNo(string $line): bool
    {
        return preg_match('/(?:[০-৯0-9]\s*){10,}/u', $line) === 1;
    }

    private function extractProfessionAndDob(string $block): array
    {
        $lines = preg_split('/\n+/', $block);
        $professionLine = null;
        $dobString = null;

        foreach ($lines as $index => $line) {
            $line = trim($line);
            if ($line === '') {
                continue;
            }

            if (preg_match('/^পেশা\s*[:：ঃ]?\s*(.*)$/u', $line, $match)) {
                $value = trim($match[1] ?? '');
                if ($value === '') {
                    $value = $this->nextNonEmptyLine($lines, $index + 1);
                    if ($value && $this->lineStartsWithLabel($value)) {
                        $value = '';
                    }
                }

                $professionLine = $value !== '' ? $value : null;
                if ($professionLine && preg_match('/জন(?:্ম)?\s*তারিখ[:：ঃ\\s]*([০-৯0-9\/\-]+)/u', $professionLine, $mDob)) {
                    $dobString = $mDob[1] ?? null;
                }
                break;
            }
        }

        if (! $professionLine) {
            foreach ($lines as $line) {
                $line = trim($line);
                if ($line === '') {
                    continue;
                }

                if (preg_match('/জন(?:্ম)?\s*তারিখ[:：ঃ\\s]*([০-৯0-9\/\-]+)/u', $line, $mDob)) {
                    $dobString = $dobString ?: ($mDob[1] ?? null);
                    $left = $this->trimBanglaPunctuation(preg_split('/জন(?:্ম)?\s*তারিখ[:：ঃ\\s]*/u', $line)[0] ?? '');
                    if ($left !== '' && ! $this->lineStartsWithLabel($left)) {
                        $professionLine = $left;
                    }
                    break;
                }
            }
        }

        if ($professionLine) {
            $professionLine = preg_replace('/^পেশা\s*[:：ঃ]?\s*/u', '', $professionLine);
            if (preg_match('/^ঠিকানা\s*[:：ঃ]?/u', $professionLine)) {
                $professionLine = null;
            }
        }

        return ['line' => $professionLine, 'dob' => $dobString];
    }

    private function buildMergeData(Voter $existing, array $parsed, bool $mergeOnlyMissing): array
    {
        $fields = [
            'serial_no',
            'serial_no_pdf_bn',
            'name_bn',
            'father_name_bn',
            'mother_name_bn',
            'profession_bn',
            'date_of_birth',
            'date_of_birth_bn',
            'address_bn',
        ];

        $updateData = [];
        foreach ($fields as $field) {
            $currentValue = $existing->{$field};
            $newValue = $parsed[$field] ?? null;

            if (in_array($field, ['serial_no', 'serial_no_pdf_bn'], true)) {
                if ($this->shouldReplaceSerial($currentValue, $newValue, $mergeOnlyMissing, $field === 'serial_no_pdf_bn')) {
                    $updateData[$field] = $newValue;
                }
                continue;
            }

            if ($mergeOnlyMissing) {
                if ($this->isBlankValue($currentValue, $field) && ! $this->isBlankValue($newValue, $field)) {
                    $updateData[$field] = $newValue;
                }
                continue;
            }

            if ($this->shouldReplaceValue($currentValue, $newValue, $field)) {
                $updateData[$field] = $newValue;
            }
        }

        return $updateData;
    }

    private function shouldReplaceSerial($current, $candidate, bool $mergeOnlyMissing, bool $isPdfBn): bool
    {
        if ($candidate === null || $candidate === '') {
            return false;
        }

        $candidateStr = (string) $candidate;
        if ($isPdfBn && ! preg_match('/^[০-৯]{1,4}$/u', $candidateStr)) {
            return false;
        }

        if (! $isPdfBn && ! preg_match('/^[0-9]+$/', $candidateStr)) {
            return false;
        }

        if ($mergeOnlyMissing) {
            return $current === null || $current === '' || $current === 0;
        }

        return (string) $current !== $candidateStr;
    }

    private function findExistingVoter(SourceFile $sourceFile, array $parsed): ?Voter
    {
        $serialNoBn = $parsed['serial_no_pdf_bn'] ?? null;
        $serialNoBn = $serialNoBn ? $this->normalizeBnDigits($serialNoBn) : null;
        if ($serialNoBn) {
            $existing = Voter::where('serial_no_pdf_bn', $serialNoBn)
                ->where('source_file_id', $sourceFile->id)
                ->first();
            if ($existing) {
                return $existing;
            }
        }

        $voterNo = $parsed['voter_no'] ?? null;
        if ($voterNo) {
            $existing = Voter::where('voter_no', $voterNo)
                ->where('area_id', $sourceFile->area_id)
                ->first();
            if ($existing && $existing->source_file_id && $existing->source_file_id !== $sourceFile->id) {
                return null;
            }
            if ($existing) {
                return $existing;
            }
        }

        return null;
    }

    private function cleanupInvalidSerialDuplicates(SourceFile $sourceFile, ?string $serialNoBn, ?string $validVoterNo): void
    {
        if (! $serialNoBn || ! $this->isValidVoterNo($validVoterNo)) {
            return;
        }

        $duplicates = Voter::where('source_file_id', $sourceFile->id)
            ->where('serial_no_pdf_bn', $serialNoBn)
            ->get();

        foreach ($duplicates as $row) {
            if (! $this->isValidVoterNo($row->voter_no)) {
                $row->delete();
            }
        }
    }

    private function shouldUpdateVoterNo(?string $current, ?string $candidate): bool
    {
        if (! $this->isValidVoterNo($candidate)) {
            return false;
        }

        return ! $this->isValidVoterNo($current);
    }

    private function isValidVoterNo(?string $value): bool
    {
        if (! $value) {
            return false;
        }

        $digits = $this->normalizeBnDigits($value);
        $digits = preg_replace('/[^০-৯]/u', '', $digits);
        $length = mb_strlen($digits);

        return $length >= 10 && $length <= 17;
    }

    private function trackMissingFields(array $summary, array $parsed): array
    {
        foreach (['father_name_bn', 'mother_name_bn', 'profession_bn'] as $field) {
            if ($this->isBlankValue($parsed[$field] ?? null, $field)) {
                $summary['missing_fields'][$field]++;
            }
        }

        return $summary;
    }

    private function isBlankValue(?string $value, string $field = ''): bool
    {
        if ($value === null) {
            return true;
        }

        $trimmed = trim($value);
        if ($trimmed === '') {
            return true;
        }

        if (preg_match('/^[:ঃ]+$/u', $trimmed)) {
            return true;
        }

        if ($field === 'profession_bn' && preg_match('/^(ঠিকানা|ভোটার\s*নং)[:：ঃ]/u', $trimmed)) {
            return true;
        }

        return false;
    }

    private function shouldReplaceValue(?string $current, ?string $candidate, string $field): bool
    {
        if ($this->isBlankValue($candidate, $field)) {
            return false;
        }

        if ($this->isBlankValue($current, $field)) {
            return true;
        }

        if ($current === $candidate) {
            return false;
        }

        $currentSuspicious = $this->looksSuspicious($current);
        $candidateSuspicious = $this->looksSuspicious($candidate);
        if ($currentSuspicious && ! $candidateSuspicious) {
            return true;
        }

        $currentScore = $this->valueQualityScore($current);
        $candidateScore = $this->valueQualityScore($candidate);

        return $candidateScore >= ($currentScore + 2);
    }

    private function looksSuspicious(string $value): bool
    {
        if (preg_match('/�/', $value)) {
            return true;
        }

        return $this->countSuspiciousTokens($value) > 0;
    }

    private function valueQualityScore(string $value): int
    {
        $score = mb_strlen($value);
        $score -= preg_match_all('/�/', $value) * 5;
        $score -= $this->countSuspiciousTokens($value) * 3;

        return $score;
    }

    private function countSuspiciousTokens(string $value): int
    {
        $tokens = preg_split('/\s+/u', trim($value));
        $count = 0;

        foreach ($tokens as $token) {
            if ($token === '') {
                continue;
            }

            if (preg_match('/^[\\p{Mn}\\p{P}\\p{S}]+$/u', $token)) {
                $count++;
                continue;
            }

            if (mb_strlen($token) === 1 && preg_match('/[ািীুূেোৃ]/u', $token)) {
                $count++;
                continue;
            }

            if (preg_match('/^[অ-ঔ][ািীুূেোৃ]$/u', $token)) {
                $count++;
            }
        }

        return $count;
    }

    private function shouldRunFallbackOcr(array $summary): bool
    {
        $missing = $summary['missing_fields'] ?? [];
        $failedBlocks = (int) ($summary['number_of_failed_blocks'] ?? 0);

        return ($missing['father_name_bn'] ?? 0) > 0
            || ($missing['mother_name_bn'] ?? 0) > 0
            || ($missing['profession_bn'] ?? 0) > 0
            || $failedBlocks > 0;
    }

    private function shouldRunGridFallback(array $summary): bool
    {
        if ($this->envBool('VM_OCR_GRID_ALWAYS_FALLBACK', false)) {
            return true;
        }

        $minPerPage = (int) $this->env('VM_OCR_MIN_VOTERS_PER_PAGE', 10);
        $parsed = (int) ($summary['number_of_voters_parsed'] ?? 0);

        if ($parsed === 0) {
            return true;
        }

        return $parsed < $minPerPage;
    }

    private function getFallbackPsms(): array
    {
        $configured = $this->env('VM_OCR_PSM_FALLBACK', '6,11,4,12,3,7,13');
        $psms = array_filter(array_map('trim', explode(',', (string) $configured)));
        $base = (string) $this->env('VM_OCR_PSM', '6');
        $psms = array_filter($psms, fn ($psm) => $psm !== '' && $psm !== $base);

        return array_values(array_unique($psms));
    }

    private function getFallbackGrids(): array
    {
        $configured = $this->env('VM_OCR_GRID_FALLBACK', '3x6,3x5');
        $entries = array_filter(array_map('trim', explode(',', (string) $configured)));

        $defaultCols = max(1, (int) $this->env('VM_OCR_GRID_COLS', 3));
        $defaultRows = max(1, (int) $this->env('VM_OCR_GRID_ROWS', 5));
        $defaultKey = $defaultCols.'x'.$defaultRows;

        $grids = [];
        foreach ($entries as $entry) {
            $parsed = $this->parseGridSpec($entry);
            if (! $parsed) {
                continue;
            }
            [$cols, $rows] = $parsed;
            $key = $cols.'x'.$rows;
            if ($key === $defaultKey) {
                continue;
            }
            $grids[$key] = [$cols, $rows];
        }

        return array_values($grids);
    }

    private function parseGridSpec(string $spec): ?array
    {
        if (! preg_match('/^\s*(\d+)\s*x\s*(\d+)\s*$/i', $spec, $m)) {
            return null;
        }

        $cols = max(1, (int) $m[1]);
        $rows = max(1, (int) $m[2]);

        return [$cols, $rows];
    }

    private function mergeSummaries(array $base, array $fallback): array
    {
        $base['number_of_voters_parsed'] += $fallback['number_of_voters_parsed'] ?? 0;
        $base['number_of_failed_blocks'] += $fallback['number_of_failed_blocks'] ?? 0;
        if (! empty($fallback['failed_blocks'])) {
            $base['failed_blocks'] = array_merge($base['failed_blocks'] ?? [], $fallback['failed_blocks']);
        }
        if (isset($base['missing_fields'], $fallback['missing_fields'])) {
            foreach ($base['missing_fields'] as $field => $count) {
                $base['missing_fields'][$field] = min($count, $fallback['missing_fields'][$field] ?? $count);
            }
        }

        return $base;
    }

    private function looksLikeVoterText(string $text): bool
    {
        if (trim($text) === '') {
            return false;
        }

        $hasVoter = preg_match('/ভোটার\s*নং/u', $text) || preg_match('/ভাটার\s*নং/u', $text);
        $hasName = preg_match('/নাম[:：ঃ]/u', $text);
        if ($hasVoter && $hasName) {
            return true;
        }

        return preg_match('/[০-৯]{6,}/u', $text) === 1;
    }

    private function nextNonEmptyLine(array $lines, int $startIndex): ?string
    {
        for ($i = $startIndex; $i < count($lines); $i++) {
            $line = trim($lines[$i] ?? '');
            if ($line !== '') {
                return $line;
            }
        }

        return null;
    }

    private function lineStartsWithLabel(string $line): bool
    {
        return (bool) preg_match('/^(নাম|ভোটার\s*নং|পিতা|মাতা|ঠিকানা|জন্ম\s*তারিখ|পেশা)\s*[:：ঃ]?/u', trim($line));
    }

    private function trimBanglaPunctuation(string $value): string
    {
        $trimmed = preg_replace('/^[\s,ঃ:]+|[\s,ঃ:]+$/u', '', $value);

        return $trimmed ?? '';
    }

    private function normalizeBlock(string $block): string
    {
        $block = $this->removeInvalidUtf8($block);
        $block = str_replace(
            ['োভাটার', 'োভাটার', 'ভাটা', 'ভাটার', 'িপতা', 'িপেশা', 'িঠকানা', 'োপশা', 'িপতা', 'পশা'],
            ['ভোটার', 'ভোটার', 'ভোটা', 'ভোটার', 'পিতা', 'পেশা', 'ঠিকানা', 'পেশা', 'পিতা', 'পেশা'],
            $block
        );

        $block = preg_replace('/(^|\\n)দত\\s+/u', '$1পিতা ', $block);
        $block = preg_replace('/জ\\s*তািরখ/u', 'জন্ম তারিখ', $block);
        $block = preg_replace('/জ\\s*তা\\s*রিখ/u', 'জন্ম তারিখ', $block);
        $block = preg_replace('/জন্\\s*তারিখ/u', 'জন্ম তারিখ', $block);
        $block = preg_replace('/জন্/u', 'জন্ম', $block);
        $block = preg_replace('/জন্মম/u', 'জন্ম', $block);
        $block = preg_replace('/তারখ/u', 'তারিখ', $block);
        $block = preg_replace('/ভা[ঢটড়]ার/u', 'ভোটার', $block);
        $block = str_replace(['ভাঢার', 'ভাটার', 'ভাড়ার', 'ভাঁড়ার'], 'ভোটার', $block);
        $block = preg_replace('/\s+(?=(পিতা|মাতা|পেশা|ঠিকানা|ভোটার\s*নং)\s*[:：ঃ])/u', "\n", $block);


        return $block;
    }

    private function removeInvalidUtf8(string $value): string
    {
        if ($value === '') {
            return $value;
        }

        if (function_exists('mb_convert_encoding')) {
            $previous = mb_substitute_character();
            mb_substitute_character('none');
            $cleaned = mb_convert_encoding($value, 'UTF-8', 'UTF-8');
            mb_substitute_character($previous);
            if ($cleaned !== false && $cleaned !== null) {
                $value = $cleaned;
            }
        } elseif (function_exists('iconv')) {
            $cleaned = @iconv('UTF-8', 'UTF-8//IGNORE', $value);
            if ($cleaned !== false) {
                $value = $cleaned;
            }
        }

        return $value;
    }

    private function sanitizeLabelValue(?string $value, string $label): ?string
    {
        if (! $value) {
            return null;
        }

        $value = preg_replace('/\s+/u', ' ', trim($value));
        $parts = preg_split('/\s*'.preg_quote($label, '/').'[:：ঃ]\s*/u', $value);
        $value = trim($parts[count($parts) - 1] ?? $value);

        if ($value === '' || preg_match('/^[:ঃ]+$/u', $value)) {
            return null;
        }

        return $value !== '' ? $value : null;
    }

    private function cleanBangla(?string $value): ?string
    {
        if (! $value) {
            return null;
        }

        $value = trim($value);
        $value = str_replace('_', ' ', $value);
        $value = preg_replace('/\s+/u', ' ', $value);
        if (class_exists(Normalizer::class)) {
            $value = Normalizer::normalize($value, Normalizer::FORM_C);
        }

        $value = str_replace(['াে', 'ো'], 'ো', $value);
        $value = str_replace(['াৈ', 'ৈা'], 'ৈ', $value);
        $value = str_replace(['োা', 'াো'], 'ো', $value);
        $value = str_replace(['মোাঃ', 'মোাসাঃ'], ['মোঃ', 'মোসাঃ'], $value);
        $value = str_replace(
            ['বোগম', 'খােলক', 'খােলেক', 'খোলক', 'হোোসন', 'হোোসেন', 'আহামদ', 'বোসরকারী', 'বিসরকারী', 'বযবসা', 'িফেরাজা', 'রিবনা', 'মোাহন', 'মোাসাঃ', 'মোাসা:', 'মোাসা ', 'মোাস ', 'নোায়াগাও', 'নোায়াগঁাও', 'নোায়াগা', 'নোায়া', 'গাজীপুর', 'ফোতমা', 'মোাহামদ', 'হোফজ', 'মোাসামৎ', 'মোাসা', 'িততারকল', 'িততার কল', 'তিততারকল', 'তিততার কল', 'আেবদীন', 'উিদন', 'ুিদন', 'চন দাস', 'চন ঘোষ', 'চন পাল', 'চন দে', 'চন বর্মণ', 'চন বর্মন', 'গৃিহনী'],
            ['বেগম', 'খালেক', 'খালেক', 'খালেক', 'হোসেন', 'হোসেন', 'আহমদ', 'বেসরকারি', 'বেসরকারি', 'ব্যবসা', 'ফিরোজা', 'রেবিনা', 'মোহন', 'মোসাঃ', 'মোসাঃ ', 'মোসাঃ ', 'মোসা ', 'নোয়াগাঁও', 'নোয়াগাঁও', 'নোয়াগাঁও', 'নোয়াগাঁও', 'গাজীপুর', 'ফাতেমা', 'মোহাম্মদ', 'হাফেজ', 'মোসামত', 'মোসা', 'তিতারকুল', 'তিতারকুল', 'তিতারকুল', 'তিতারকুল', 'আবেদীন', 'উদ্দিন', 'উদ্দিন', 'চন্দ্র দাস', 'চন্দ্র ঘোষ', 'চন্দ্র পাল', 'চন্দ্র দে', 'চন্দ্র বর্মণ', 'চন্দ্র বর্মণ', 'গৃহিনী'],
            $value
        );

        $value = preg_replace('/^ী\\s*মিত/u', 'শ্রী মতি', $value);
        $value = preg_replace('/^ী\\s*মতি/u', 'শ্রী মতি', $value);
        $value = str_replace('রিবদাস', 'রবিদাস', $value);

        $value = $this->reorderLeadingVowel($value);

        // Regex fallback fixes
        $value = preg_replace('/ব.?গম/u', 'বেগম', $value);
        $value = preg_replace('/খ.?লক/u', 'খালেক', $value);
        $value = preg_replace('/হো.?সন/u', 'হোসেন', $value);
        $value = preg_replace('/মো.?ঃ/u', 'মোঃ', $value);
        $value = preg_replace('/মো.?সাঃ/u', 'মোসাঃ', $value);
        $value = preg_replace('/ব.?সরকারি/u', 'বেসরকারি', $value);
        $value = preg_replace('/ব.?সরকারী/u', 'বেসরকারি', $value);
        $value = preg_replace('/নো.?য়াগ.?ঁ?া?ও/u', 'নোয়াগাঁও', $value);
        $value = preg_replace('/নো.?য়া.?গ.?া?ও?/u', 'নোয়াগাঁও', $value);
        $value = str_replace('গিয়াস', 'গিয়াস', $value);
        $value = str_replace('িগয়াস', 'গিয়াস', $value);
        $value = str_replace('মাসদ ', 'মাসুদ ', $value);
        $value = str_replace('িবমল', 'বিমল', $value);
        $value = str_replace('িনরঞন', 'নিরঞ্জন', $value);
        $value = str_replace('িনমরল', 'নির্মল', $value);
        $value = str_replace('িনতাই', 'নিতাই', $value);
        $value = str_replace('যোোগন', 'যোগেন', $value);
        $value = str_replace('কৃঞ ', 'কৃষ্ণ ', $value);
        $value = str_replace(['গোৌরাঙ', 'গোৌরাং'], 'গৌরাঙ্গ', $value);
        $value = str_replace('গোৌতম', 'গৌতম', $value);
        $value = str_replace('িলটন', 'লিটন', $value);
        $value = str_replace('হািনফ', 'হানিফ', $value);
        $value = str_replace('িসরাজ', 'সিরাজ', $value);
        $value = str_replace('ছিদকর', 'ছিদ্দিকুর', $value);
        $value = str_replace('িছিদক', 'ছিদ্দিক', $value);
        $value = str_replace('বিল্লাল', 'বিলাল', $value);
        $value = str_replace('হািববুর', 'হাবিবুর', $value);
        $value = str_replace('ছািনস', 'ছানিস', $value);
        $value = str_replace('মিজবুল', 'মজিবুল', $value);
        $value = str_replace(['রোোকয়', 'রোোকয়া'], 'রোকেয়া', $value);
        $value = str_replace(['তোাফাজল', 'মোাফাজল'], ['তোফাজল', 'মোফাজল'], $value);
        $value = str_replace('আেয়শা', 'আয়েশা', $value);
        $value = str_replace('িবলাল', 'বিলাল', $value);
        $value = str_replace('শিমক', 'শ্রমিক', $value);
        $value = str_replace('িময়া', 'মিয়া', $value);
        $value = str_replace('আিমর', 'আমির', $value);
        $value = str_replace('জোোবদা', 'জোবেদা', $value);
        $value = str_replace('জোোবদালী', 'জোবেদ আলী', $value);
        $value = str_replace('মিজদ', 'মজিদ', $value);
        $value = str_replace('নোয়ঁা', 'নোয়া', $value);
        $value = str_replace('নোয়ঁাগাও', 'নোয়াগাঁও', $value);
        $value = str_replace(['নয়াগঁাও', 'নয়াগঁাও'], 'নোয়াগাঁও', $value);
        $value = str_replace('রয়াজ', 'রিয়াজ', $value);
        $value = str_replace('িরিয়াজ', 'রিয়াজ', $value);
        $value = str_replace('িরিয়াজুল', 'রিয়াজুল', $value);
        $value = str_replace('িরিয়াজিদন', 'রিয়াজুদ্দিন', $value);
        $value = str_replace(' ির', ' রি', $value);
        $value = str_replace('চঁা', 'চাঁ', $value);
        $value = str_replace('পূ্্ন', 'পূর্ণ', $value);
        $value = str_replace('লকী', 'লক্ষী', $value);
        $value = str_replace('রপন', 'রূপন', $value);
        $value = str_replace('পভাত', 'প্রভাত', $value);
        $value = str_replace('পিবত', 'প্রবতী', $value);
        $value = str_replace('তিততাকল', 'তিতারকুল', $value);
        $value = str_replace('িততাকল', 'তিতারকুল', $value);
        $value = str_replace('ভবীন', 'ভবানী', $value);
        $value = str_replace('প্রবত ', 'প্রবতী ', $value);
        $value = preg_replace('/োো/u', 'ো', $value);
        $value = str_replace(['মোছা:', 'মোছাঃ', 'মোসা:', 'মোসাা:', 'মোসাঃ', 'মাছা:', 'মাছাঃ', 'মাসা:', 'মাসাঃ'], 'মোসাঃ', $value);
        $value = str_replace(['মো:', 'মোঃ', 'আঃ', 'আ:'], ['মোঃ', 'মোঃ', 'আঃ', 'আঃ'], $value);
        $value = str_replace('তািরখ', 'তারিখ', $value);
        $value = preg_replace('/াি([ক-হড়ঢ়য])/u', 'া$1ি', $value);
        $value = preg_replace('/,?জন তািরখ[:：]?[০-৯\\/\\-]+/u', '', $value);
        $value = preg_replace('/,?জন তারিখ[:：]?[০-৯\\/\\-]+/u', '', $value);
        $value = str_replace(['রোৌ', 'রোো'], ['রৌ', 'রো'], $value);
        $value = preg_replace('/িি+/u', 'ি', $value);
        $value = preg_replace('/(?<=\\s)ি(?=\\s|$)/u', '', $value);
        $value = preg_replace('/^ি\\s*/u', '', $value);
        $value = preg_replace('/\\s+ি$/u', '', $value);
        $value = preg_replace('/সখ\s/iu', 'সুখ ', $value);
        $value = preg_replace('/মো.?হামদ/u', 'মোহাম্মদ', $value);
        $value = preg_replace('/মাহমুদলা/u', 'মাহমুদুল্লা', $value);
        $value = str_replace('চাকরী', 'চাকরি', $value);
        $value = str_replace('সরকারী', 'সরকারি', $value);
        $value = str_replace('গ্ৃহিনী', 'গৃহিনী', $value);
        $value = preg_replace('/মো.?হ/u', 'মোহ', $value);
        $value = str_replace(['িরনা', 'তিপরন'], ['রিনা', 'তিপরন'], $value);
        $value = preg_replace('/মো.?াসঃ/u', 'মোসাঃ', $value);
        $value = preg_replace('/মো.?াস:?/u', 'মোসা', $value);
        $value = str_replace('গাজীপুরর', 'গাজীপুর', $value);

                        // Extra Bangla fixes for common broken patterns in addresses
        $value = str_replace(
            [
                'গাজীপুরর', 'গাজীপুরর ', 'গাজীপুরর,', 'গাজীপুরর\u00a0', "গাজীপুরর\t", "গাজীপুরর\u0009",
                'গাজীপুরর সদর', 'গাজীপুরর সদর,', 'গাজীপুরর সদর ',
                'নোয়াগাঁওঁও', 'নোয়াগাঁওঁ', 'নোয়াগাঁওঁও,', 'নোয়াগাঁওঁও ', 'নোয়াগাঁওঁ ', 'নোয়াগাও',
                'নয়ানগাঁও', 'নয়ানগাঁওঁও', 'নয়ানগাঁওঁ', 'নয়ানগাঁওঁও,', 'নয়ানগাঁওঁও ', 'নয়ানগাঁওঁ ',
                'তিতার কল', 'তিতারকল',
            ],
            [
                'গাজীপুর', 'গাজীপুর ', 'গাজীপুর,', 'গাজীপুর ', 'গাজীপুর ', 'গাজীপুর ',
                'গাজীপুর সদর', 'গাজীপুর সদর,', 'গাজীপুর সদর ',
                'নোয়াগাঁও', 'নোয়াগাঁও', 'নোয়াগাঁও,', 'নোয়াগাঁও ', 'নোয়াগাঁও ', 'নোয়াগাঁও',
                'নোয়াগাঁও', 'নোয়াগাঁও', 'নোয়াগাঁও', 'নোয়াগাঁও,', 'নোয়াগাঁও ', 'নোয়াগাঁও ',
                'তিতারকুল', 'তিতারকুল',
            ],
            $value
        );

        // Collapse doubled nasal+vowel patterns that sometimes leak from OCR
        $value = str_replace(['ঁওঁও', 'ঁওঁ'], 'ঁও', $value);

        return $value;
    }

    private function stripLatinNoise(?string $value): ?string
    {
        if (! $value) {
            return null;
        }

        if (! preg_match('/[\\x{0980}-\\x{09FF}]/u', $value)) {
            return $value;
        }

        $value = preg_replace('/\b[a-zA-Z]{2,}\b/u', '', $value);
        $value = preg_replace('/\s{2,}/u', ' ', $value);
        $value = trim($value);

        return $value !== '' ? $value : null;
    }

    private function reorderLeadingVowel(string $value): string
    {
        $pattern = '/(^|\s|\/|\-|\,)([োৌেৈিী])([ক-হড়ঢ়য়৺])/u';

        return preg_replace_callback($pattern, function ($m) {
            return $m[1].$m[3].$m[2];
        }, $value);
    }

    public function getPdfTotalPages(SourceFile $sourceFile): ?int
    {
        $filePath = Storage::path($sourceFile->stored_path);
        if (! file_exists($filePath)) {
            return null;
        }

        $count = $this->getPdfInfoPageCount($filePath) ?? $this->getParserPageCount($filePath);

        return $count && $count > 0 ? $count : null;
    }

    private function getPdfInfoPageCount(string $filePath): ?int
    {
        $pdfinfo = $this->env('VM_PDFINFO_PATH', 'pdfinfo');

        try {
            $process = new Process([$pdfinfo, $filePath]);
            $process->setTimeout(60);
            $process->mustRun();
            $output = $process->getOutput();

            if (preg_match('/^Pages:\\s*(\\d+)/mi', $output, $matches)) {
                return (int) $matches[1];
            }
        } catch (\Throwable $e) {
            // ignore and fallback
        }

        return null;
    }

    private function getParserPageCount(string $filePath): ?int
    {
        try {
            $parser = new Parser();
            $pdf = $parser->parseFile($filePath);
            $details = $pdf->getDetails();
            if (isset($details['Pages'])) {
                return (int) $details['Pages'];
            }
        } catch (\Throwable $e) {
            // ignore
        }

        return null;
    }

    private function getText(string $filePath): string
    {
        $cacheMode = $this->env('VM_USE_OCR', false) ? 'ocr' : ($this->env('VM_USE_PDFTOTEXT', false) ? 'pdftotext' : 'parser');
        $cacheSalt = $cacheMode;
        if ($cacheMode === 'ocr') {
            $cacheSalt .= '|'.$this->env('VM_OCR_DPI', 300).'|'.$this->env('VM_OCR_PSM', '6').'|'.$this->env('VM_OCR_LANG', 'ben+eng').'|'.$this->env('VM_OCR_OEM', '1');
        } elseif ($cacheMode === 'pdftotext') {
            $cacheSalt .= '|'.$this->env('VM_PDFTOTEXT_PATH', 'pdftotext');
        }
        $cacheKey = sha1($filePath.'|'.filemtime($filePath).'|'.filesize($filePath).'|'.$cacheSalt);
        $cached = storage_path('app/ocr/'.$cacheKey.'.txt');

        if ($this->env('VM_USE_PDFTOTEXT', false)) {
            if (File::exists($cached)) {
                return File::get($cached);
            }

            try {
                $text = $this->pdfToText($filePath);
                File::ensureDirectoryExists(storage_path('app/ocr'));
                File::put($cached, $text);

                return $text;
            } catch (\Throwable $e) {
                // continue
            }
        }

        if ($this->env('VM_USE_OCR', false)) {
            if (File::exists($cached)) {
                return File::get($cached);
            }

            try {
                $text = $this->ocrText($filePath);
                File::ensureDirectoryExists(storage_path('app/ocr'));
                File::put($cached, $text);

                return $text;
            } catch (\Throwable $e) {
                // continue
            }
        }

        $parser = new Parser();
        $pdf = $parser->parseFile($filePath);

        return $pdf->getText();
    }

    private function ocrPageText(string $filePath, int $page, ?string $psmOverride = null): string
    {
        if ($this->shouldUseBoxOcr()) {
            try {
                return $this->ocrPageTextByGrid($filePath, $page, $psmOverride);
            } catch (\Throwable $e) {
                report($e);
            }
        }

        return $this->ocrPageTextSimple($filePath, $page, $psmOverride);
    }

    private function ocrPageTextSimple(string $filePath, int $page, ?string $psmOverride = null): string
    {
        $pdftoppm = $this->resolveBinary($this->env('VM_PDFTOPPM_PATH', 'pdftoppm'));
        $tessdataDir = $this->resolveBinary($this->env('VM_TESSDATA_DIR', base_path('tools/tessdata')));
        if (! $tessdataDir || ! File::exists($tessdataDir)) {
            $tessdataDir = base_path('tools/tessdata');
        }
        $ocrLang = $this->env('VM_OCR_LANG', 'ben+eng');
        $ocrPsm = $psmOverride !== null ? (string) $psmOverride : (string) $this->env('VM_OCR_PSM', '6');
        $ocrOem = (string) $this->env('VM_OCR_OEM', '1');
        $ocrDpi = (int) $this->env('VM_OCR_DPI', 300);

        $cacheKey = sha1($filePath.'|'.filemtime($filePath).'|'.filesize($filePath).'|page:'.$page.'|simple|'.$ocrDpi.'|'.$ocrPsm.'|'.$ocrLang.'|'.$ocrOem);
        $cached = storage_path('app/ocr/page-'.$cacheKey.'.txt');
        if (File::exists($cached)) {
            return File::get($cached);
        }

        $tempDir = storage_path('app/ocr/'.Str::random(8));
        File::makeDirectory($tempDir, 0775, true, true);
        $imagePrefix = $tempDir.'/page';

        $convert = new Process([
            $pdftoppm,
            '-r',
            (string) $ocrDpi,
            '-f',
            (string) $page,
            '-l',
            (string) $page,
            '-png',
            $filePath,
            $imagePrefix,
        ]);
        $convert->setWorkingDirectory(base_path());
        $convert->setTimeout(600);
        $convert->mustRun();

        $images = collect(File::glob($imagePrefix.'-*.png'))->sort()->values();
        if ($images->isEmpty()) {
            throw new \RuntimeException('OCR image generation ব্যর্থ হয়েছে।');
        }

        $output = '';
        foreach ($images as $img) {
            $pageText = $this->ocrImageText($img, $ocrLang, $ocrPsm, $ocrOem, $tessdataDir);
            $output .= $pageText."\n";
        }

        File::ensureDirectoryExists(storage_path('app/ocr'));
        File::put($cached, $output);
        File::deleteDirectory($tempDir);

        return $output;
    }

    private function ocrPageTextByGrid(
        string $filePath,
        int $page,
        ?string $psmOverride = null,
        ?int $gridColsOverride = null,
        ?int $gridRowsOverride = null,
        ?bool $preprocessOverride = null,
        ?int $thresholdOverride = null,
        ?int $dpiOverride = null
    ): string
    {
        $pdftoppm = $this->resolveBinary($this->env('VM_PDFTOPPM_PATH', 'pdftoppm'));
        $tessdataDir = $this->resolveBinary($this->env('VM_TESSDATA_DIR', base_path('tools/tessdata')));
        if (! $tessdataDir || ! File::exists($tessdataDir)) {
            $tessdataDir = base_path('tools/tessdata');
        }
        $ocrLang = $this->env('VM_OCR_LANG', 'ben+eng');
        $ocrPsm = $psmOverride !== null ? (string) $psmOverride : (string) $this->env('VM_OCR_PSM', '6');
        $ocrOem = (string) $this->env('VM_OCR_OEM', '1');
        $ocrDpi = $dpiOverride ?? (int) $this->env('VM_OCR_DPI', 300);

        $gridCols = $gridColsOverride ?? max(1, (int) $this->env('VM_OCR_GRID_COLS', 3));
        $gridRows = $gridRowsOverride ?? max(1, (int) $this->env('VM_OCR_GRID_ROWS', 5));
        $insetPct = (float) $this->env('VM_OCR_BOX_INSET', 0.03);
        $preprocess = $preprocessOverride ?? $this->envBool('VM_OCR_PREPROCESS', true);
        $preprocessMode = (string) $this->env('VM_OCR_PREPROCESS_MODE', 'gd');
        $preprocessThreshold = $thresholdOverride ?? (int) $this->env('VM_OCR_PREPROCESS_THRESHOLD', 60);
        $deskew = (string) $this->env('VM_OCR_DESKEW', '40%');

        $cacheKey = sha1(
            $filePath.'|'.filemtime($filePath).'|'.filesize($filePath)
            .'|page:'.$page.'|grid:'.$gridCols.'x'.$gridRows.'|inset:'.$insetPct
            .'|pre:'.($preprocess ? '1' : '0').'|mode:'.$preprocessMode.'|thr:'.$preprocessThreshold.'|deskew:'.$deskew
            .'|margins:'.implode(',', [
                $this->env('VM_OCR_GRID_MARGIN_TOP', '0'),
                $this->env('VM_OCR_GRID_MARGIN_BOTTOM', '0'),
                $this->env('VM_OCR_GRID_MARGIN_LEFT', '0'),
                $this->env('VM_OCR_GRID_MARGIN_RIGHT', '0'),
            ])
            .'|'.$ocrDpi.'|'.$ocrPsm.'|'.$ocrLang.'|'.$ocrOem
        );
        $cached = storage_path('app/ocr/page-grid-'.$cacheKey.'.txt');
        if (File::exists($cached)) {
            return File::get($cached);
        }

        $tempDir = storage_path('app/ocr/'.Str::random(8));
        File::makeDirectory($tempDir, 0775, true, true);
        $imagePrefix = $tempDir.'/page';

        $convert = new Process([
            $pdftoppm,
            '-r',
            (string) $ocrDpi,
            '-f',
            (string) $page,
            '-l',
            (string) $page,
            '-png',
            $filePath,
            $imagePrefix,
        ]);
        $convert->setWorkingDirectory(base_path());
        $convert->setTimeout(600);
        $convert->mustRun();

        $images = collect(File::glob($imagePrefix.'-*.png'))->sort()->values();
        if ($images->isEmpty()) {
            throw new \RuntimeException('OCR image generation ব্যর্থ হয়েছে।');
        }

        $pageImage = $images->first();
        $dimensions = @getimagesize($pageImage);
        if (! $dimensions) {
            File::deleteDirectory($tempDir);
            throw new \RuntimeException('OCR image size read failed.');
        }

        [$width, $height] = $dimensions;
        $marginTop = $this->parseMarginValue($this->env('VM_OCR_GRID_MARGIN_TOP', '0.19'), (int) $height);
        $marginBottom = $this->parseMarginValue($this->env('VM_OCR_GRID_MARGIN_BOTTOM', '0.14'), (int) $height);
        $marginLeft = $this->parseMarginValue($this->env('VM_OCR_GRID_MARGIN_LEFT', '0.08'), (int) $width);
        $marginRight = $this->parseMarginValue($this->env('VM_OCR_GRID_MARGIN_RIGHT', '0.08'), (int) $width);

        $gridX = max(0, min((int) $width - 1, $marginLeft));
        $gridY = max(0, min((int) $height - 1, $marginTop));
        $gridWidth = max(1, (int) $width - $marginLeft - $marginRight);
        $gridHeight = max(1, (int) $height - $marginTop - $marginBottom);

        $boxes = $this->buildOcrGrid($gridWidth, $gridHeight, $gridCols, $gridRows, $insetPct, $gridX, $gridY);
        if (empty($boxes)) {
            File::deleteDirectory($tempDir);
            throw new \RuntimeException('OCR grid calculation failed.');
        }

        $output = '';
        if ($this->canUseGd()) {
            $pageResource = @imagecreatefrompng($pageImage);
            if (! $pageResource) {
                File::deleteDirectory($tempDir);
                throw new \RuntimeException('OCR image load failed.');
            }

            if ($this->envBool('VM_OCR_GRID_DETECT_LINES', true)) {
                $detectedBoxes = $this->buildOcrGridFromDetectedLines(
                    $pageResource,
                    (int) $width,
                    (int) $height,
                    $gridRows,
                    $gridCols,
                    $insetPct
                );
                if (! empty($detectedBoxes)) {
                    $boxes = $detectedBoxes;
                }
            }

            foreach ($boxes as $index => $box) {
                $crop = imagecrop($pageResource, $box);
                if (! $crop) {
                    continue;
                }

                $boxPath = $tempDir.'/box-'.$index.'.png';
                imagepng($crop, $boxPath);
                imagedestroy($crop);

                $ocrImage = $boxPath;
                $pad = (int) $this->env('VM_OCR_BOX_PAD', 0);
                if ($pad > 0) {
                    $ocrImage = $this->padOcrImage($ocrImage, $tempDir, $index, $pad);
                }
                if ($preprocess) {
                    $ocrImage = $this->preprocessImageFile($ocrImage, $tempDir, $index, $preprocessMode, $preprocessThreshold, $deskew);
                }
                $boxText = $this->ocrImageText($ocrImage, $ocrLang, $ocrPsm, $ocrOem, $tessdataDir);
                if ($this->shouldRunLineFallback($boxText)) {
                    $lineText = $this->ocrBoxLines($ocrImage, $tempDir, $index, $ocrLang, $ocrOem, $tessdataDir);
                    if ($lineText !== '') {
                        $boxText = $this->mergeOcrLines($boxText, $lineText);
                    }
                }
                if ($ocrImage !== $boxPath && $this->shouldRunLineFallback($boxText)) {
                    $rawLineText = $this->ocrBoxLines($boxPath, $tempDir, $index, $ocrLang, $ocrOem, $tessdataDir);
                    if ($rawLineText !== '') {
                        $boxText = $this->mergeOcrLines($boxText, $rawLineText);
                    }
                }
                if ($this->shouldRunLineFallback($boxText)) {
                    foreach ($this->getFallbackPsms() as $fallbackPsm) {
                        $fallbackText = $this->ocrImageText($ocrImage, $ocrLang, $fallbackPsm, $ocrOem, $tessdataDir);
                        if ($fallbackText !== '') {
                            $boxText = $this->mergeOcrLines($boxText, $fallbackText);
                        }
                    }
                }
                if ($this->envBool('VM_OCR_BAND_FALLBACK', true)
                    && (! preg_match('/পিতা/u', $boxText) || ! preg_match('/মাতা/u', $boxText))) {
                    $bandText = $this->ocrBoxBand(
                        $ocrImage,
                        $tempDir,
                        $index,
                        $ocrLang,
                        $ocrOem,
                        $tessdataDir,
                        $this->getFallbackPsms()
                    );
                    if ($bandText !== '') {
                        $boxText = $this->mergeOcrLines($boxText, $bandText);
                    }
                }
                if ($this->shouldRunLineFallback($boxText)) {
                    $scaleFallback = (float) $this->env('VM_OCR_BOX_SCALE_FALLBACK', 1.8);
                    if ($scaleFallback > 1.01) {
                        $scaledPath = $this->scaleOcrImage($boxPath, $tempDir, $index + 9000, $scaleFallback);
                        $scaledText = $this->ocrImageText($scaledPath, $ocrLang, $ocrPsm, $ocrOem, $tessdataDir);
                        if ($scaledText === '' && ! empty($this->getFallbackPsms())) {
                            foreach ($this->getFallbackPsms() as $fallbackPsm) {
                                $scaledText = $this->ocrImageText($scaledPath, $ocrLang, $fallbackPsm, $ocrOem, $tessdataDir);
                                if ($scaledText !== '') {
                                    break;
                                }
                            }
                        }
                        if ($scaledText !== '') {
                            $boxText = $this->mergeOcrLines($boxText, $scaledText);
                        }
                    }
                }
                if ($this->envBool('VM_OCR_TOPLINE_FALLBACK', true) && ! preg_match('/নাম[:ঃ]?\s*/u', $boxText)) {
                    $topText = $this->ocrTopLine($ocrImage, $tempDir, $index, $ocrLang, $ocrOem, $tessdataDir);
                    if ($topText) {
                        $boxText = trim($topText)."\n".$boxText;
                    }
                }
                $output .= $boxText."\n<<<VM_BOX>>>\n";
            }

            imagedestroy($pageResource);
        } else {
            foreach ($boxes as $index => $box) {
                $boxPrefix = $tempDir.'/box-'.$index;
                $this->renderPdftoppmCrop($pdftoppm, $filePath, $page, $ocrDpi, $box, $boxPrefix);
                $boxImage = collect(File::glob($boxPrefix.'-*.png'))->sort()->values()->first();
                if (! $boxImage) {
                    continue;
                }

                $ocrImage = $preprocess ? $this->preprocessImageFile($boxImage, $tempDir, $index, $preprocessMode, $preprocessThreshold, $deskew) : $boxImage;
                $output .= $this->ocrImageText($ocrImage, $ocrLang, $ocrPsm, $ocrOem, $tessdataDir)."\n<<<VM_BOX>>>\n";
            }
        }

        File::ensureDirectoryExists(storage_path('app/ocr'));
        File::put($cached, $output);
        File::deleteDirectory($tempDir);

        return $output;
    }

    private function buildOcrGrid(
        int $width,
        int $height,
        int $cols,
        int $rows,
        float $insetPct,
        int $offsetX = 0,
        int $offsetY = 0
    ): array
    {
        $cols = max(1, $cols);
        $rows = max(1, $rows);
        $insetPct = max(0.0, min(0.2, $insetPct));

        $cellWidth = (int) floor($width / $cols);
        $cellHeight = (int) floor($height / $rows);
        if ($cellWidth <= 0 || $cellHeight <= 0) {
            return [];
        }

        $boxes = [];
        for ($row = 0; $row < $rows; $row++) {
            for ($col = 0; $col < $cols; $col++) {
                $x = $offsetX + ($col * $cellWidth);
                $y = $offsetY + ($row * $cellHeight);
                $w = ($col === $cols - 1) ? $width - ($col * $cellWidth) : $cellWidth;
                $h = ($row === $rows - 1) ? $height - ($row * $cellHeight) : $cellHeight;

                $insetX = (int) floor($w * $insetPct);
                $insetY = (int) floor($h * $insetPct);
                $x = $x + $insetX;
                $y = $y + $insetY;
                $w = max(1, $w - (2 * $insetX));
                $h = max(1, $h - (2 * $insetY));

                $boxes[] = [
                    'x' => (int) $x,
                    'y' => (int) $y,
                    'width' => (int) $w,
                    'height' => (int) $h,
                ];
            }
        }

        return $boxes;
    }

    private function buildOcrGridFromDetectedLines(
        $image,
        int $width,
        int $height,
        int $rows,
        int $cols,
        float $insetPct
    ): array
    {
        $horizontal = $this->detectHorizontalLines($image, $width, $height);
        $vertical = $this->detectVerticalLines($image, $width, $height);

        if (count($horizontal) < $rows + 1 || count($vertical) < $cols + 1) {
            return [];
        }

        $horizontal = $this->selectGridLines($horizontal, $rows + 1);
        $vertical = $this->selectGridLines($vertical, $cols + 1);
        if (empty($horizontal) || empty($vertical)) {
            return [];
        }

        return $this->buildOcrGridFromLines($horizontal, $vertical, $insetPct);
    }

    private function buildOcrGridFromLines(array $horizontal, array $vertical, float $insetPct): array
    {
        $rows = count($horizontal) - 1;
        $cols = count($vertical) - 1;
        if ($rows <= 0 || $cols <= 0) {
            return [];
        }

        $insetPct = max(0.0, min(0.2, $insetPct));
        $boxes = [];
        for ($row = 0; $row < $rows; $row++) {
            for ($col = 0; $col < $cols; $col++) {
                $x = (int) $vertical[$col];
                $y = (int) $horizontal[$row];
                $w = (int) ($vertical[$col + 1] - $x);
                $h = (int) ($horizontal[$row + 1] - $y);

                if ($w <= 1 || $h <= 1) {
                    continue;
                }

                $insetX = (int) floor($w * $insetPct);
                $insetY = (int) floor($h * $insetPct);
                $x = $x + $insetX;
                $y = $y + $insetY;
                $w = max(1, $w - (2 * $insetX));
                $h = max(1, $h - (2 * $insetY));

                $boxes[] = [
                    'x' => (int) $x,
                    'y' => (int) $y,
                    'width' => (int) $w,
                    'height' => (int) $h,
                ];
            }
        }

        return $boxes;
    }

    private function detectHorizontalLines($image, int $width, int $height): array
    {
        $step = max(1, (int) $this->env('VM_OCR_LINE_SAMPLE_STEP', 3));
        $ratio = (float) $this->env('VM_OCR_LINE_DARK_RATIO', 0.6);
        $darkLimit = (int) $this->env('VM_OCR_LINE_DARK_THRESHOLD', 120);
        $lines = [];

        for ($y = 0; $y < $height; $y++) {
            $dark = 0;
            $samples = 0;
            for ($x = 0; $x < $width; $x += $step) {
                $rgb = imagecolorat($image, $x, $y);
                $r = ($rgb >> 16) & 0xFF;
                $g = ($rgb >> 8) & 0xFF;
                $b = $rgb & 0xFF;
                if (($r + $g + $b) < $darkLimit) {
                    $dark++;
                }
                $samples++;
            }

            if ($samples > 0 && ($dark / $samples) >= $ratio) {
                $lines[] = $y;
            }
        }

        return $this->clusterLinePositions($lines, (int) $this->env('VM_OCR_LINE_CLUSTER_GAP', 12));
    }

    private function detectVerticalLines($image, int $width, int $height): array
    {
        $step = max(1, (int) $this->env('VM_OCR_LINE_SAMPLE_STEP', 3));
        $ratio = (float) $this->env('VM_OCR_LINE_DARK_RATIO', 0.6);
        $darkLimit = (int) $this->env('VM_OCR_LINE_DARK_THRESHOLD', 120);
        $lines = [];

        for ($x = 0; $x < $width; $x++) {
            $dark = 0;
            $samples = 0;
            for ($y = 0; $y < $height; $y += $step) {
                $rgb = imagecolorat($image, $x, $y);
                $r = ($rgb >> 16) & 0xFF;
                $g = ($rgb >> 8) & 0xFF;
                $b = $rgb & 0xFF;
                if (($r + $g + $b) < $darkLimit) {
                    $dark++;
                }
                $samples++;
            }

            if ($samples > 0 && ($dark / $samples) >= $ratio) {
                $lines[] = $x;
            }
        }

        return $this->clusterLinePositions($lines, (int) $this->env('VM_OCR_LINE_CLUSTER_GAP', 12));
    }

    private function clusterLinePositions(array $lines, int $gap): array
    {
        if (empty($lines)) {
            return [];
        }

        sort($lines);
        $gap = max(1, $gap);
        $clusters = [];
        $current = [$lines[0]];

        for ($i = 1; $i < count($lines); $i++) {
            $line = $lines[$i];
            $last = $current[count($current) - 1];
            if (($line - $last) <= $gap) {
                $current[] = $line;
            } else {
                $clusters[] = (int) round(array_sum($current) / count($current));
                $current = [$line];
            }
        }

        if (! empty($current)) {
            $clusters[] = (int) round(array_sum($current) / count($current));
        }

        return $clusters;
    }

    private function selectGridLines(array $lines, int $count): array
    {
        if (count($lines) < $count) {
            return [];
        }

        sort($lines);
        $best = [];
        $bestSpan = -1;

        for ($i = 0; $i <= count($lines) - $count; $i++) {
            $subset = array_slice($lines, $i, $count);
            $span = end($subset) - $subset[0];
            if ($span > $bestSpan) {
                $bestSpan = $span;
                $best = $subset;
            }
        }

        return $best;
    }

    private function shouldUseBoxOcr(): bool
    {
        return $this->envBool('VM_OCR_USE_BOXES', true);
    }

    private function looksLikeNameLine(string $line): bool
    {
        if ($line === '' || $this->lineStartsWithLabel($line)) {
            return false;
        }

        if (preg_match('/ভোটার\s*নং|ঠিকানা|জন্ম\s*তারিখ|জন্ম|পেশা/u', $line)) {
            return false;
        }

        if ($this->isLocationToken($line)) {
            return false;
        }

        $letterCount = preg_match_all('/\p{L}/u', $line);
        if ($letterCount < 2) {
            return false;
        }

        $digitCount = preg_match_all('/[0-9\x{09E6}-\x{09EF}]/u', $line);

        return $digitCount <= 4;
    }

    private function isLocationToken(string $value): bool
    {
        $value = trim($value);
        if ($value === '') {
            return false;
        }

        $value = preg_replace('/[[:punct:]ঃ]+/u', '', $value);
        $value = trim($value);
        if ($value === '' || preg_match('/\s/u', $value)) {
            return false;
        }

        return preg_match('/^(সদর|উপজেলা|থানা|জেলা|গ্রাম|ইউনিয়ন|ইউনিয়ন|ওয়ার্ড|ওয়ার্ড|পৌরসভা|সিটি|কর্পোরেশন|এলাকা)$/u', $value) === 1;
    }

    private function parseMarginValue($raw, int $total): int
    {
        if ($raw === null || $raw === '') {
            return 0;
        }

        $value = trim((string) $raw);
        if ($value === '') {
            return 0;
        }

        if (str_ends_with($value, '%')) {
            $percent = (float) rtrim($value, '%');
            $percent = max(0.0, min(30.0, $percent));

            return (int) round($total * ($percent / 100));
        }

        $number = (float) $value;
        if ($number > 1) {
            return (int) round($number);
        }

        $number = max(0.0, min(0.3, $number));

        return (int) round($total * $number);
    }

    private function env(string $key, $default = null)
    {
        return config('votermaster.'.$key, $default);
    }

    private function envBool(string $key, bool $default = false): bool
    {
        $value = $this->env($key);
        if ($value === null) {
            return $default;
        }

        if (is_bool($value)) {
            return $value;
        }

        $filtered = filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);

        return $filtered ?? $default;
    }

    private function canUseGd(): bool
    {
        return function_exists('imagecreatefrompng')
            && function_exists('imagecrop')
            && function_exists('imagepng')
            && function_exists('imagefilter');
    }

    private function padOcrImage(string $sourcePath, string $tempDir, int $index, int $padding): string
    {
        if (! $this->canUseGd() || $padding <= 0) {
            return $sourcePath;
        }

        $image = @imagecreatefrompng($sourcePath);
        if (! $image) {
            return $sourcePath;
        }

        $width = imagesx($image);
        $height = imagesy($image);
        $pad = max(0, $padding);
        if ($width <= 0 || $height <= 0 || $pad === 0) {
            imagedestroy($image);
            return $sourcePath;
        }

        $newWidth = $width + ($pad * 2);
        $newHeight = $height + ($pad * 2);
        $canvas = imagecreatetruecolor($newWidth, $newHeight);
        $white = imagecolorallocate($canvas, 255, 255, 255);
        imagefill($canvas, 0, 0, $white);
        imagecopy($canvas, $image, $pad, $pad, 0, 0, $width, $height);
        imagedestroy($image);

        $paddedPath = $tempDir.'/box-'.$index.'-pad.png';
        imagepng($canvas, $paddedPath);
        imagedestroy($canvas);

        return File::exists($paddedPath) ? $paddedPath : $sourcePath;
    }

    private function ocrTopLine(string $sourcePath, string $tempDir, int $index, string $lang, string $oem, string $tessdataDir): ?string
    {
        if (! $this->canUseGd()) {
            return null;
        }

        $ratio = (float) $this->env('VM_OCR_TOPLINE_RATIO', 0.35);
        $ratio = max(0.15, min(0.6, $ratio));
        $psm = (string) $this->env('VM_OCR_TOPLINE_PSM', '7');

        $image = @imagecreatefrompng($sourcePath);
        if (! $image) {
            return null;
        }

        $width = imagesx($image);
        $height = imagesy($image);
        if ($width <= 0 || $height <= 0) {
            imagedestroy($image);
            return null;
        }

        $cropHeight = max(1, (int) floor($height * $ratio));
        $crop = imagecrop($image, [
            'x' => 0,
            'y' => 0,
            'width' => $width,
            'height' => $cropHeight,
        ]);
        imagedestroy($image);
        if (! $crop) {
            return null;
        }

        $topPath = $tempDir.'/box-'.$index.'-top.png';
        imagepng($crop, $topPath);
        imagedestroy($crop);

        if (! File::exists($topPath)) {
            return null;
        }

        return $this->ocrImageText($topPath, $lang, $psm, $oem, $tessdataDir);
    }
    private function shouldRunLineFallback(string $boxText): bool
    {
        if (! $this->envBool('VM_OCR_LINE_FALLBACK', true)) {
            return false;
        }

        $hasName = $this->labelHasLettersValue($boxText, 'নাম');
        $hasVoter = $this->labelHasVoterNoValue($boxText);
        $hasFather = $this->labelHasLettersValue($boxText, 'পিতা');
        $hasMother = $this->labelHasLettersValue($boxText, 'মাতা');

        if (! ($hasName && $hasVoter && $hasFather && $hasMother)) {
            return true;
        }
        $checks = ['নাম', 'ভোটার', 'পিতা', 'মাতা'];
        $hits = 0;
        foreach ($checks as $token) {
            if (mb_strpos($boxText, $token) !== false) {
                $hits++;
            }
        }

        return $hits < 3;
    }


    private function labelHasLettersValue(string $text, string $label): bool
    {
        return preg_match('/'.preg_quote($label, '/').'\s*[:：ঃ]?\s*[\p{L}]{2,}/u', $text) === 1;
    }

    private function labelHasVoterNoValue(string $text): bool
    {
        return preg_match('/ভোটার\s*নং[:：ঃ]?\s*(?:[০-৯0-9]\s*){10,}/u', $text) === 1;
    }


    private function ocrBoxLines(string $sourcePath, string $tempDir, int $index, string $lang, string $oem, string $tessdataDir): string
    {
        if (! $this->canUseGd()) {
            return '';
        }

        $rows = max(4, (int) $this->env('VM_OCR_LINE_ROWS', 7));
        $overlap = (float) $this->env('VM_OCR_LINE_OVERLAP', 0.15);
        $overlap = max(0.0, min(0.4, $overlap));
        $psm = (string) $this->env('VM_OCR_LINE_PSM', '7');
        $fallbackPsms = array_filter(array_map('trim', explode(',', (string) $this->env('VM_OCR_LINE_PSM_FALLBACK', '6'))));
        $fallbackPsms[] = '13';
        $fallbackPsms = array_values(array_unique(array_filter($fallbackPsms, fn ($value) => $value !== '' && $value !== $psm)));
        $linePreprocess = $this->envBool('VM_OCR_LINE_PREPROCESS', true);
        $lineThreshold = (int) $this->env('VM_OCR_LINE_THRESHOLD', (int) $this->env('VM_OCR_PREPROCESS_THRESHOLD', 60));
        $lineMode = (string) $this->env('VM_OCR_LINE_PREPROCESS_MODE', (string) $this->env('VM_OCR_PREPROCESS_MODE', 'gd'));
        $lineScale = (float) $this->env('VM_OCR_LINE_SCALE', 1.0);

        $image = @imagecreatefrompng($sourcePath);
        if (! $image) {
            return '';
        }

        $width = imagesx($image);
        $height = imagesy($image);
        if ($width <= 0 || $height <= 0) {
            imagedestroy($image);
            return '';
        }

        $lineTexts = $this->collectLineTexts(
            $image,
            $width,
            $height,
            $rows,
            $overlap,
            $tempDir,
            $index,
            $lang,
            $oem,
            $tessdataDir,
            $linePreprocess,
            $lineThreshold,
            $lineMode,
            $psm,
            $fallbackPsms,
            'a',
            $lineScale
        );

        $lineText = implode("\n", $lineTexts);
        if ($this->shouldRetryLineOcr($lineText)) {
            $rowsFallback = (int) $this->env('VM_OCR_LINE_ROWS_FALLBACK', $rows + 3);
            $rowsFallback = max($rows + 1, $rowsFallback);
            $overlapFallback = (float) $this->env('VM_OCR_LINE_OVERLAP_FALLBACK', min(0.4, $overlap + 0.1));
            $overlapFallback = max(0.0, min(0.45, $overlapFallback));
            $thresholdFallback = (int) $this->env('VM_OCR_LINE_THRESHOLD_FALLBACK', (int) $this->env('VM_OCR_PREPROCESS_THRESHOLD_FALLBACK', $lineThreshold));
            $scaleFallback = (float) $this->env('VM_OCR_LINE_SCALE_FALLBACK', max(1.2, $lineScale + 0.8));
            $fallbackLines = $this->collectLineTexts(
                $image,
                $width,
                $height,
                $rowsFallback,
                $overlapFallback,
                $tempDir,
                $index,
                $lang,
                $oem,
                $tessdataDir,
                $linePreprocess,
                $thresholdFallback,
                $lineMode,
                $psm,
                $fallbackPsms,
                'b',
                $scaleFallback
            );
            $lineTexts = $this->mergeLineTexts($lineTexts, $fallbackLines);
        }

        imagedestroy($image);

        return implode("\n", $lineTexts);
    }

    private function sanitizePersonName(?string $value): ?string
    {
        if (! $value) {
            return null;
        }

        $value = trim($value);
        if ($value === '') {
            return null;
        }

        if (preg_match('/ভোটার\s*নং|ঠিকানা|জন্ম\s*তারিখ|জন্ম|পেশা/u', $value)) {
            return null;
        }

        if ($this->isLocationToken($value)) {
            return null;
        }

        if (preg_match('/^(সদর|উপজেলা|থানা|জেলা|গ্রাম|ইউনিয়ন|ইউনিয়ন|ওয়ার্ড|ওয়ার্ড|পৌরসভা|সিটি|কর্পোরেশন|এলাকা)\b/u', $value)) {
            return null;
        }

        if (preg_match('/^(পিতা|পতা|মাতা|মতা)[:ঃ]?$/u', $value)) {
            return null;
        }

        if (preg_match('/[:ঃ]\s*$/u', $value)) {
            return null;
        }

        $letters = preg_match_all('/\p{L}/u', $value);
        if ($letters < 2) {
            return null;
        }

        $digits = preg_match_all('/[0-9\x{09E6}-\x{09EF}]/u', $value);
        if ($digits > 4) {
            return null;
        }

        return $value;
    }
    private function collectLineTexts(
        $image,
        int $width,
        int $height,
        int $rows,
        float $overlap,
        string $tempDir,
        int $index,
        string $lang,
        string $oem,
        string $tessdataDir,
        bool $linePreprocess,
        int $lineThreshold,
        string $lineMode,
        string $psm,
        array $fallbackPsms,
        string $suffix,
        float $scale
    ): array {
        $lineHeight = (int) ceil($height / max(1, $rows));
        $lineTexts = [];

        for ($row = 0; $row < $rows; $row++) {
            $y = (int) floor($row * $lineHeight - ($lineHeight * $overlap));
            $y = max(0, $y);
            $h = (int) ceil($lineHeight + (2 * $lineHeight * $overlap));
            $h = min($height - $y, $h);
            if ($h <= 1) {
                continue;
            }

            $crop = imagecrop($image, [
                'x' => 0,
                'y' => $y,
                'width' => $width,
                'height' => $h,
            ]);
            if (! $crop) {
                continue;
            }

            $linePath = $tempDir.'/box-'.$index.'-line-'.$suffix.'-'.$row.'.png';
            imagepng($crop, $linePath);
            imagedestroy($crop);

            if (! File::exists($linePath)) {
                continue;
            }

            $lineOcrPath = $linePath;
            $lineIndex = ($index * 100) + $row + ($suffix === 'b' ? 50 : 0);
            if ($scale > 1.01) {
                $lineOcrPath = $this->scaleOcrImage($lineOcrPath, $tempDir, $lineIndex, $scale);
            }
            if ($linePreprocess) {
                $lineOcrPath = $this->preprocessImageFile($lineOcrPath, $tempDir, $lineIndex, $lineMode, $lineThreshold, '0%');
            }

            $text = $this->ocrImageText($lineOcrPath, $lang, $psm, $oem, $tessdataDir);
            if ($text === '' && ! empty($fallbackPsms)) {
                foreach ($fallbackPsms as $fallbackPsm) {
                    $text = $this->ocrImageText($lineOcrPath, $lang, $fallbackPsm, $oem, $tessdataDir);
                    if ($text !== '') {
                        break;
                    }
                }
            }

            $text = trim(preg_replace('/\s+/', ' ', $text));
            if ($text !== '') {
                $lineTexts[] = $text;
            }
        }

        return $lineTexts;
    }

    private function scaleOcrImage(string $sourcePath, string $tempDir, int $index, float $scale): string
    {
        if (! $this->canUseGd() || $scale <= 1.01) {
            return $sourcePath;
        }

        $image = @imagecreatefrompng($sourcePath);
        if (! $image) {
            return $sourcePath;
        }

        $width = imagesx($image);
        $height = imagesy($image);
        if ($width <= 0 || $height <= 0) {
            imagedestroy($image);
            return $sourcePath;
        }

        $newWidth = (int) max(1, round($width * $scale));
        $newHeight = (int) max(1, round($height * $scale));
        $canvas = imagecreatetruecolor($newWidth, $newHeight);
        $white = imagecolorallocate($canvas, 255, 255, 255);
        imagefill($canvas, 0, 0, $white);
        imagecopyresampled($canvas, $image, 0, 0, 0, 0, $newWidth, $newHeight, $width, $height);
        imagedestroy($image);

        $scaledPath = $tempDir.'/box-'.$index.'-scale.png';
        imagepng($canvas, $scaledPath);
        imagedestroy($canvas);

        return File::exists($scaledPath) ? $scaledPath : $sourcePath;
    }

    private function mergeLineTexts(array $base, array $extra): array
    {
        $merged = $base;
        foreach ($extra as $line) {
            if (! in_array($line, $merged, true)) {
                $merged[] = $line;
            }
        }

        return $merged;
    }

    private function shouldRetryLineOcr(string $text): bool
    {
        $text = trim($text);
        if ($text === '') {
            return true;
        }

        $lineCount = count(preg_split('/\n+/', $text));
        $hasFather = preg_match('/পিতা/u', $text) === 1;
        $hasMother = preg_match('/মাতা/u', $text) === 1;

        return $lineCount < 4 || ! ($hasFather && $hasMother);
    }

    private function mergeOcrLines(string $boxText, string $lineText): string
    {
        $existing = array_values(array_filter(preg_split('/\n+/', trim($boxText))));
        $lines = array_values(array_filter(preg_split('/\n+/', trim($lineText))));

        if (empty($lines)) {
            return implode("\n", $existing);
        }

        $useLineBase = count($lines) >= 4;
        $base = $useLineBase ? $lines : $existing;
        $extras = $useLineBase ? $existing : $lines;
        $baseText = implode("\n", $base);

        foreach ($extras as $line) {
            $line = trim($line);
            if ($line === '') {
                continue;
            }

            if (mb_strpos($baseText, $line) !== false) {
                continue;
            }

            if ($this->lineStartsWithLabel($line) || $this->looksLikeNameLine($line) || $this->lineHasVoterNo($line)) {
                $base[] = $line;
                $baseText .= "\n".$line;
            }
        }

        return implode("\n", array_values(array_filter($base)));
    }

    private function ocrBoxBand(
        string $sourcePath,
        string $tempDir,
        int $index,
        string $lang,
        string $oem,
        string $tessdataDir,
        array $fallbackPsms
    ): string {
        if (! $this->canUseGd()) {
            return '';
        }

        $image = @imagecreatefrompng($sourcePath);
        if (! $image) {
            return '';
        }

        $width = imagesx($image);
        $height = imagesy($image);
        if ($width <= 0 || $height <= 0) {
            imagedestroy($image);
            return '';
        }

        $topRatio = (float) $this->env('VM_OCR_BAND_TOP', 0.25);
        $bottomRatio = (float) $this->env('VM_OCR_BAND_BOTTOM', 0.75);
        $topRatio = max(0.0, min(0.8, $topRatio));
        $bottomRatio = max($topRatio + 0.1, min(1.0, $bottomRatio));
        $y = (int) floor($height * $topRatio);
        $h = (int) max(1, floor($height * $bottomRatio) - $y);
        $crop = imagecrop($image, [
            'x' => 0,
            'y' => $y,
            'width' => $width,
            'height' => $h,
        ]);
        imagedestroy($image);
        if (! $crop) {
            return '';
        }

        $bandPath = $tempDir.'/box-'.$index.'-band.png';
        imagepng($crop, $bandPath);
        imagedestroy($crop);
        if (! File::exists($bandPath)) {
            return '';
        }

        $scale = (float) $this->env('VM_OCR_BAND_SCALE', 2.0);
        $scale = max(1.0, min(4.0, $scale));
        $bandOcrPath = $scale > 1.01 ? $this->scaleOcrImage($bandPath, $tempDir, $index + 7000, $scale) : $bandPath;

        $bandPreprocess = $this->envBool('VM_OCR_BAND_PREPROCESS', true);
        $bandThreshold = (int) $this->env('VM_OCR_BAND_THRESHOLD', (int) $this->env('VM_OCR_PREPROCESS_THRESHOLD', 60));
        $bandMode = (string) $this->env('VM_OCR_BAND_PREPROCESS_MODE', (string) $this->env('VM_OCR_PREPROCESS_MODE', 'gd'));
        if ($bandPreprocess) {
            $bandOcrPath = $this->preprocessImageFile($bandOcrPath, $tempDir, $index + 7100, $bandMode, $bandThreshold, '0%');
        }

        $bandPsm = (string) $this->env('VM_OCR_BAND_PSM', '7');
        $text = $this->ocrImageText($bandOcrPath, $lang, $bandPsm, $oem, $tessdataDir);
        if ($text === '' && ! empty($fallbackPsms)) {
            foreach ($fallbackPsms as $fallbackPsm) {
                $text = $this->ocrImageText($bandOcrPath, $lang, $fallbackPsm, $oem, $tessdataDir);
                if ($text !== '') {
                    break;
                }
            }
        }

        return trim($text);
    }

    private function preprocessImageFile(string $sourcePath, string $tempDir, int $index, string $mode, int $threshold, string $deskew): string
    {
        $processed = $tempDir.'/box-'.$index.'-pre.png';
        $contrast = (int) $this->env('VM_OCR_PREPROCESS_CONTRAST', -20);
        if ($mode === 'magick') {
            if ($this->preprocessWithMagick($sourcePath, $processed, $threshold, $deskew)) {
                return $processed;
            }
        }

        if ($this->canUseGd()) {
            if ($this->preprocessWithGd($sourcePath, $processed, $threshold, $contrast)) {
                return $processed;
            }
        }

        if ($mode !== 'magick' && $this->preprocessWithMagick($sourcePath, $processed, $threshold, $deskew)) {
            return $processed;
        }

        return $sourcePath;
    }

    private function preprocessWithGd(string $sourcePath, string $destPath, int $threshold, int $contrast): bool
    {
        if (! $this->canUseGd()) {
            return false;
        }

        $image = @imagecreatefrompng($sourcePath);
        if (! $image) {
            return false;
        }

        imagefilter($image, IMG_FILTER_GRAYSCALE);
        $contrast = max(-100, min(100, $contrast));
        imagefilter($image, IMG_FILTER_CONTRAST, $contrast);

        $threshold = max(0, min(100, $threshold));
        $limit = (int) round(255 * ($threshold / 100));
        $white = imagecolorallocate($image, 255, 255, 255);
        $black = imagecolorallocate($image, 0, 0, 0);
        $width = imagesx($image);
        $height = imagesy($image);

        for ($y = 0; $y < $height; $y++) {
            for ($x = 0; $x < $width; $x++) {
                $rgb = imagecolorat($image, $x, $y);
                $r = ($rgb >> 16) & 0xFF;
                $g = ($rgb >> 8) & 0xFF;
                $b = $rgb & 0xFF;
                $gray = (int) round(($r + $g + $b) / 3);
                imagesetpixel($image, $x, $y, $gray > $limit ? $white : $black);
            }
        }

        imagepng($image, $destPath);
        imagedestroy($image);

        return File::exists($destPath);
    }

    private function preprocessWithMagick(string $sourcePath, string $destPath, int $threshold, string $deskew): bool
    {
        $magick = $this->resolveImagemagickPath();
        if (! $magick) {
            return false;
        }

        $threshold = max(0, min(100, $threshold));
        $args = [$magick, $sourcePath, '-colorspace', 'Gray'];
        if ($deskew !== '') {
            $args[] = '-deskew';
            $args[] = $deskew;
        }
        $args[] = '-threshold';
        $args[] = $threshold.'%';
        $args[] = $destPath;

        $process = new Process($args);
        $process->setTimeout(60);
        $process->run();

        return $process->isSuccessful() && File::exists($destPath);
    }

    private function resolveImagemagickPath(): ?string
    {
        $path = $this->env('VM_IMAGEMAGICK_PATH', 'magick');
        $resolved = $this->resolveBinary($path);

        return $resolved !== '' ? $resolved : null;
    }

    private function resolveBinary(?string $path): string
    {
        $trimmed = trim((string) $path);
        if ($trimmed === '') {
            return '';
        }

        if (preg_match('/^[A-Za-z]:\\\\|^\\\\\\\\|^\//', $trimmed)) {
            return $trimmed;
        }

        $candidate = base_path($trimmed);
        if (File::exists($candidate)) {
            return $candidate;
        }

        return $trimmed;
    }

    private function renderPdftoppmCrop(string $pdftoppm, string $filePath, int $page, int $dpi, array $box, string $prefix): void
    {
        $process = new Process([
            $pdftoppm,
            '-r',
            (string) $dpi,
            '-f',
            (string) $page,
            '-l',
            (string) $page,
            '-x',
            (string) $box['x'],
            '-y',
            (string) $box['y'],
            '-W',
            (string) $box['width'],
            '-H',
            (string) $box['height'],
            '-png',
            $filePath,
            $prefix,
        ]);
        $process->setWorkingDirectory(base_path());
        $process->setTimeout(600);
        $process->mustRun();
    }

    private function ocrImageText(string $imagePath, string $ocrLang, string $ocrPsm, string $ocrOem, string $tessdataDir): string
    {
        $tesseract = $this->resolveBinary($this->env('VM_TESSERACT_PATH', 'tesseract'));
        if (! $tessdataDir || ! File::exists($tessdataDir)) {
            $tessdataDir = base_path('tools/tessdata');
        }
        $ocr = new Process([
            $tesseract,
            $imagePath,
            'stdout',
            '-l',
            $ocrLang,
            '--psm',
            $ocrPsm,
            '--oem',
            $ocrOem,
            '--tessdata-dir',
            $tessdataDir,
            '-c',
            'preserve_interword_spaces=1',
        ]);
        $ocr->setEnv(array_merge($_ENV, $_SERVER, ['TESSDATA_PREFIX' => $tessdataDir]));
        $ocr->setWorkingDirectory(base_path());
        $ocr->setTimeout(120);
        $ocr->mustRun();

        return $this->sanitizeOcrOutput($ocr->getOutput());
    }

    private function sanitizeOcrOutput(string $text): string
    {
        $text = str_replace("\0", '', $text);
        $cleaned = preg_replace('/[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F]/', '', $text);

        return $cleaned ?? $text;
    }

    private function ocrText(string $filePath): string
    {
        $pdftoppm = $this->resolveBinary($this->env('VM_PDFTOPPM_PATH', 'pdftoppm'));
        $tesseract = $this->resolveBinary($this->env('VM_TESSERACT_PATH', 'tesseract'));
        $tessdataDir = $this->resolveBinary($this->env('VM_TESSDATA_DIR', base_path('tools/tessdata')));
        if (! $tessdataDir || ! File::exists($tessdataDir)) {
            $tessdataDir = base_path('tools/tessdata');
        }
        $ocrLang = $this->env('VM_OCR_LANG', 'ben+eng');
        $ocrPsm = (string) $this->env('VM_OCR_PSM', '6');
        $ocrOem = (string) $this->env('VM_OCR_OEM', '1');
        $ocrDpi = (int) $this->env('VM_OCR_DPI', 300);

        $tempDir = storage_path('app/ocr/'.Str::random(8));
        File::makeDirectory($tempDir, 0775, true, true);
        $imagePrefix = $tempDir.'/page';

        $convert = new Process([$pdftoppm, '-r', (string) $ocrDpi, '-png', $filePath, $imagePrefix]);
        $convert->setWorkingDirectory(base_path());
        $convert->setTimeout(600);
        $convert->mustRun();

        $images = collect(File::glob($imagePrefix.'-*.png'))->sort()->values();
        if ($images->isEmpty()) {
            throw new \RuntimeException('OCR image generation ব্যর্থ হয়েছে।');
        }

        $output = '';
        foreach ($images as $img) {
            $ocr = new Process([
                $tesseract,
                $img,
                'stdout',
                '-l',
                $ocrLang,
                '--psm',
                $ocrPsm,
                '--oem',
                $ocrOem,
                '--tessdata-dir',
                $tessdataDir,
                '-c',
                'preserve_interword_spaces=1',
            ]);
            $ocr->setEnv(array_merge($_ENV, $_SERVER, ['TESSDATA_PREFIX' => $tessdataDir]));
            $ocr->setWorkingDirectory(base_path());
            $ocr->setTimeout(120);
            $ocr->mustRun();
            $pageText = $ocr->getOutput();
            $pageText = str_replace("\0", '', $pageText);
            $pageText = preg_replace('/[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F]/', '', $pageText);
            $output .= $pageText."\n";
        }

        File::deleteDirectory($tempDir);

        return $output;
    }

    private function pdfToTextPage(string $filePath, int $page): string
    {
        $pdftotext = $this->resolvePdftotextPath();
        if (! $pdftotext) {
            return '';
        }

        $cacheKey = sha1($filePath.'|'.filemtime($filePath).'|'.filesize($filePath).'|page:'.$page.'|'.$pdftotext);
        $cached = storage_path('app/ocr/pdftotext-page-'.$cacheKey.'.txt');
        if (File::exists($cached)) {
            return File::get($cached);
        }

        File::ensureDirectoryExists(storage_path('app/ocr'));
        $tempTxt = storage_path('app/ocr/'.Str::random(8).'.txt');
        $process = new Process([
            $pdftotext,
            '-enc',
            'UTF-8',
            '-raw',
            '-f',
            (string) $page,
            '-l',
            (string) $page,
            $filePath,
            $tempTxt,
        ]);
        $process->setTimeout(180);
        $process->run();

        if (! File::exists($tempTxt)) {
            return '';
        }

        $text = File::get($tempTxt);
        File::delete($tempTxt);
        File::put($cached, $text);

        return $text;
    }

    private function pdfToText(string $filePath): string
    {
        $pdftotext = $this->resolvePdftotextPath();
        if (! $pdftotext) {
            throw new \RuntimeException('pdftotext পাওয়া যায়নি।');
        }

        $tempTxt = storage_path('app/ocr/'.Str::random(8).'.txt');
        $process = new Process([$pdftotext, '-enc', 'UTF-8', '-raw', $filePath, $tempTxt]);
        $process->setTimeout(180);
        $process->mustRun();

        if (! File::exists($tempTxt)) {
            throw new \RuntimeException('PDF to text conversion failed.');
        }

        $text = File::get($tempTxt);
        File::delete($tempTxt);

        return $text;
    }

    private function resolvePdftotextPath(): ?string
    {
        $pdftotext = $this->env('VM_PDFTOTEXT_PATH', 'pdftotext');
        if ($pdftotext && File::exists($pdftotext)) {
            return $pdftotext;
        }

        $pdftoppm = $this->env('VM_PDFTOPPM_PATH');
        if ($pdftoppm) {
            $derived = str_replace('pdftoppm.exe', 'pdftotext.exe', $pdftoppm);
            if (File::exists($derived)) {
                return $derived;
            }
        }

        return $pdftotext ?: null;
    }

    private function extract(string $pattern, string $text, int $index = 1): ?string
    {
        if (preg_match($pattern, $text, $matches)) {
            return $matches[$index] ?? null;
        }

        return null;
    }

    private function bnToEnDigits(string $value): string
    {
        return strtr($value, [
            '০' => '0', '১' => '1', '২' => '2', '৩' => '3', '৪' => '4',
            '৫' => '5', '৬' => '6', '৭' => '7', '৮' => '8', '৯' => '9',
        ]);
    }

    private function normalizeBnDigits(string $value): string
    {
        return strtr($value, [
            '0' => '০', '1' => '১', '2' => '২', '3' => '৩', '4' => '৪',
            '5' => '৫', '6' => '৬', '7' => '৭', '8' => '৮', '9' => '৯',
        ]);
    }

    private function numbersOnly(string $value): string
    {
        return preg_replace('/\D+/', '', $value);
    }

    private function parseDate(?string $dateString): ?string
    {
        if (! $dateString) {
            return null;
        }

        $converted = $this->bnToEnDigits(trim($dateString));
        $converted = str_replace(['.', ' '], ['/', ''], $converted);

        foreach (['d/m/Y', 'd-m-Y', 'Y-m-d'] as $format) {
            try {
                return Carbon::createFromFormat($format, $converted)->format('Y-m-d');
            } catch (\Throwable $e) {
                continue;
            }
        }

        return null;
    }

    private function mapGenderFromArea(SourceFile $sourceFile, ?string $genderOverride = null): string
    {
        if (in_array($genderOverride, ['male', 'female', 'unknown'], true)) {
            return $genderOverride;
        }

        return match ($sourceFile->area->gender_type ?? 'mixed') {
            'male' => 'male',
            'female' => 'female',
            default => 'unknown',
        };
    }
}






