#!/usr/bin/env perl
#
#   Copyright (c) MediaTek USA Inc., 2024
#
#   This program is free software;  you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or (at
#   your option) any later version.
#
#   This program is distributed in the hope that it will be useful, but
#   WITHOUT ANY WARRANTY;  without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#   General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program;  if not, see
#   <http://www.gnu.org/licenses/>.
#
#
# This script traverses C/C++ coverage data in JSON format, generated by
#   'llvm-cov export -format=text ....'
# Coverage data is in one or more JSON data files (generated by llvm-cov) and
# translates it into LCOV .info format.
#
#   $ clang[++] -o myExe -fprofile-inst-generate -fcoverage-mapping \
#       [-fcoverage-mcdc] ....
#   $ ./myExe ...
#   $ llvm-profdata merge -o myExe.profdata --sparse *.profraw
#   $ llvm-cov export -format=text -instr-profile=myExe.profdata \
#       ./myExe > myExe.json
#   $ llvm2lcov [--output myExe.info] [--test-name name] [options] myExe.json
#
# In order to generate MC/DC data, note that you must:
#   - use LLVM/18 or newer
#   - enable MC/DC instrumentation in your compile/link steps, and
#   - pass the '--mcdc-coverage' flag to llvm2lcov
#
# You can also use LLVM/21 or newer to generate MC/DC data more cleanly.
#
# See 'llvm2lcov --help' for more usage information
#
# See the LLVM documentation for more information on flags and compilation options.

use strict;
use version;
use warnings;
require Exporter;

use File::Basename qw(basename dirname fileparse);
use File::Spec::Functions qw /abs2rel catdir file_name_is_absolute splitdir
                              splitpath catpath catfile/;
use File::Temp;
use File::Copy qw(copy move);
use File::Path;
use Cwd qw/abs_path getcwd realpath/;
use Time::HiRes;    # for profiling
use Capture::Tiny;
use Storable;
use POSIX;

use lib "/usr/lib/lcov";
use lcovutil;

sub print_usage(*)
{
    local *HANDLE = shift;
    print(HANDLE <<EOF);
Usage: $lcovutil::tool_name [OPTIONS] json_file [json_file ...]
Translate LLVM\'s 'llvm-cov' JSON coverage data file to LCOV .info
file format.

In addition to common options supported by other tools in the LCOV
suite (e.g., --comment, --version-script, --ignore-error, --substitute,
--exclude, etc.), the tool options are:

  --output filename:
      The lcov data will be written to the specified file - or to
      the file called 'llvm2lcov.info' in the current run directory
      if this option is not used.

  --testname name:
      Coverage info will be associated with the testcase name provided.
      It is not necessary to provide a name.

  --branch-coverage:
      Include branch coverage data in the output.

  --mcdc-coverage:
      Include MC/DC data in the output.
      Note that you must be using LLVM/18 or higher, and must have instrumented
      your code for MC/DC collection.  See the LLVM documentation for details.

See LLVM documentation for directions on how to generate coverage data
in JSON format.

For example:

    # compile your example, instrumenting for MC/DC coverage
  \$ clang++ -o myExe -fprofile-inst-generate -fcoverage-mapping \\
       -fcoverage-mcdc myCode.cpp
    # run your testcases
  \$ ./myExe ...
    # convert profile data
  \$ llvm-profdata merge -o myExe.profdata --sparse *.profraw
    # export coverage data in JSON format
  \$ llvm-cov export -format=text -instr-profile=myExe.profdata \\
       ./myExe > myExe.json
    # use this script to convert to LCOV format
  \$ $lcovutil::tool_name --output myExe.info --test-name myTestcase \\
       --mcdc-coverage --branch-coverage myExe.json
    # and generate a genhtml-format coverage report:
  \$ genhtml -o html_report myExe.info ...

EOF
}

sub parse
{
    my $testname = shift;

    die('JSON file argument required') unless @_;

    my $top = TraceFile->new();

    $testname = '' unless defined($testname);

    my $srcReader = ReadCurrentSource->new();

    foreach my $jsonFile (@_) {
        die("no such JSON file '$jsonFile'") unless -e $jsonFile;
        my $json = JsonSupport::load($jsonFile);
        die("unrecognized JSON file format in $jsonFile")
            unless (defined($json) &&
                    exists($json->{data}) &&
                    'ARRAY' eq ref($json->{data}));
        my $json_version = version->parse($json->{version});

        lcovutil::info("read $jsonFile\n");

        foreach my $k (@{$json->{data}}) {
            #lcovutil::info("starting data entry..\n");
            foreach my $f (@{$k->{files}}) {
                lcovutil::info('parsing ' . $f->{filename} . " ..\n");
                my $filename =
                    ReadCurrentSource::resolve_path($f->{filename}, 1);
                if (TraceFile::skipCurrentFile($filename)) {
                    if (!exists($lcovutil::excluded_files{$filename})) {
                        $lcovutil::excluded_files{$filename} = 1;
                        lcovutil::info("Excluding $filename\n");
                    }
                    next;
                }
                $srcReader->open($filename);

                my $fileInfo = $top->data($filename);

                my $version = lcovutil::extractFileVersion($filename);
                $fileInfo->version($version)
                    if (defined($version) && $version ne "");

                my $lineData = $fileInfo->test($testname);

                my $summary    = $f->{summary};
                my $branches   = $f->{branches};
                my $segments   = $f->{segments};
                my $expansions = $f->{expansions};
                my $mcdc       = $f->{mcdc_records}
                    if $lcovutil::mcdc_coverage && exists($f->{mcdc_records});

                my $index       = 0;
                my $currentLine = 0;

                while ($index < $#$segments) {
                    my $segment = $segments->[$index];
                    die("unexpected segment data")
                        unless scalar(@$segment) == 6;
                    my ($line, $col, $count, $hasCount, $isRegionEntry, $isGap)
                        = @$segment;
                    $currentLine = $line if !$currentLine;
                    if ($hasCount) {
                        $segment = $segments->[$index + 1];
                        die("unexpected segment data")
                            unless scalar(@$segment) == 6;
                        my ($next_line, $next_col, $next_count, $next_hasCount,
                            $next_isRegionEntry, $next_isGap)
                            = @$segment;
                        if ($currentLine == $next_line && !$next_isRegionEntry)
                        {
                            while ($next_line == $currentLine &&
                                   ++$index < $#$segments) {
                                $segment = $segments->[$index + 1];
                                die("unexpected segment data")
                                    unless scalar(@$segment) == 6;
                                $next_line = $segment->[0];
                                $count     = $next_count
                                    if ($count &&
                                        $next_count > $count &&
                                        $currentLine == $next_line);
                                $next_count = $segment->[2];
                            }
                            $lineData->append($currentLine, $count);
                            ++$currentLine;
                        } else {
                            my $bound = $next_line;
                            my $i     = $index;
                            while (!$next_isRegionEntry &&
                                   $next_line == $bound &&
                                   ++$i < $#$segments) {
                                $segment = $segments->[$i + 1];
                                die("unexpected segment data")
                                    unless scalar(@$segment) == 6;
                                $next_line          = $segment->[0];
                                $next_isRegionEntry = $segment->[4];
                            }
                            --$bound
                                if ($next_isRegionEntry &&
                                    $next_line == $bound &&
                                    !($isRegionEntry && $line == $next_line));
                            $count = $next_count
                                if $next_count > $count && $line == $next_line;
                            while ($currentLine <= $bound) {
                                $lineData->append($currentLine, $count);
                                ++$currentLine;
                            }
                            ++$index;
                        }
                    } else {
                        do {
                            ++$index;
                            $segment = $segments->[$index];
                            die("unexpected segment data")
                                unless scalar(@$segment) == 6;
                            ($line, $col, $count, $hasCount,
                             $isRegionEntry, $isGap) = @$segment;
                        } while (!$hasCount && $index < $#$segments);
                        $currentLine = $isRegionEntry ? $line : $line + 1;
                    }
                }
                if ($mcdc && $json_version < version->parse("3.0.1")) {
                    my $mcdcData = $fileInfo->testcase_mcdc($testname);
                    my @mcdcBranches;       # array (start line, start column, expression)
                    foreach my $branch (@$branches) {
                        die("unexpected branch data")
                            unless scalar(@$branch) == 9;
                        # Consider only branches of "MCDCBranchRegion" kind.
                        next if ($branch->[-1] != 6);
                        my ($line, $startCol, $endline,
                            $endcol, $trueCount, $falseCount,
                            $fileId, $expandedId, $kind) = @$branch;
                        my $expr =
                            $srcReader->getExpr($line, $startCol, $endline,
                                                $endcol)
                            if $srcReader->notEmpty();
                        push(@mcdcBranches, [$line, $startCol, $expr]);
                    }
                    foreach my $m (@$mcdc) {
                        die("unexpected MC/DC data") unless scalar(@$m) == 7;
                        my ($line, $startCol, $endLine, $endCol, $expandedId,
                            $kind, $cov)
                            = @$m;
                        die("unexpected MC/DC cov")
                            unless 'ARRAY' eq ref($cov);

                        # read the source line and extract the expression...
                        my $expr =
                            $srcReader->getExpr($line, $startCol, $endLine,
                                                $endCol)
                            if ($srcReader->notEmpty());
                        my @brExprs;
                        foreach my $branch (@mcdcBranches) {
                            my ($brLine, $brCol, $brExpr) = @$branch;
                            if (($brLine > $line ||
                                 ($brLine == $line && $brCol >= $startCol))
                                &&
                                ($brLine < $endLine ||
                                 ($brLine == $endLine && $brCol <= $endCol))
                            ) {
                                push(@brExprs, [$brLine, $brCol, $brExpr]);
                            }
                        }
                        @brExprs =
                            sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] }
                            @brExprs;
                        my $current_mcdc =
                            $mcdcData->new_mcdc($mcdcData, $line);
                        my $groupSize = scalar(@$cov);
                        my $idx       = 0;
                        foreach my $c (@$cov) {
                            my $branchExpr = $brExprs[$idx]->[2]
                                if $groupSize == scalar(@brExprs);
                            my $fullExpr =
                                defined($branchExpr) &&
                                defined($expr) ? "'$branchExpr' in '$expr'" :
                                $idx;
                            $current_mcdc->insertExpr($filename, $groupSize, 0,
                                                      $c, $idx, $fullExpr);
                            $current_mcdc->insertExpr($filename, $groupSize, 1,
                                                      $c, $idx, $fullExpr);
                            ++$idx;
                        }
                        $mcdcData->close_mcdcBlock($current_mcdc);
                    }
                }
                lcovutil::info(2, "finished parsing $filename\n");
            }

            foreach my $f (@{$k->{functions}}) {
                my $name      = $f->{name};
                my $filenames = $f->{filenames};    # array
                my $filename =
                    ReadCurrentSource::resolve_path($filenames->[0], 1);
                next if (TraceFile::skipCurrentFile($filename));
                die('unexpected unknown file \'' . $filenames->[0] . '\'')
                    unless $top->file_exists($filename);
                $srcReader->open($filename);

                my $info     = $top->data($filename);
                my $count    = $f->{count};
                my $regions  = $f->{regions};    # startline/col, endline/col/
                my $branches = $f->{branches};
                # The version "3.0.1" adds fileId to mcdc.
                # This allows using MC/DC branches from expansions for placing MC/DC entries defined in expansions to expansions call sites.
                my $mcdc = $f->{mcdc_records}
                    if ($lcovutil::mcdc_coverage &&
                        $json_version >= version->parse("3.0.1") &&
                        exists($f->{mcdc_records}));

                my $functionMap = $info->testfnc($testname);
                # use branch data to derive MC/DC expression - so need
                # it, even if user didn't ask
                my $branchData = $info->testbr($testname)
                    if $lcovutil::br_coverage || $mcdc;
                my $mcdcData = $info->testcase_mcdc($testname)
                    if ($json_version >= version->parse("3.0.1") &&
                        $lcovutil::mcdc_coverage);
                my $startLine = $regions->[0]->[0];  # startline of first region
                my $endline   = $regions->[0]->[2];  # endline of last region
                if ($lcovutil::func_coverage) {
                    my $func =
                        $functionMap->define_function($name, $startLine,
                                                      $endline)
                        unless defined($functionMap->findName($name));
                    $functionMap->add_count($name, $count);
                }

                my @mcdcBranches;           # array (fileId, start line, start column, expression)
                my %expanded_mcdcBranches;  # hash of branch's fileId -> branch's start line

                if ($branchData) {
                    my $funcBranchData = BranchData->new();
                    my $regionIdx      = 0;
                    foreach my $b (@$branches) {
                        die("unexpected branch data") unless scalar(@$b) == 9;
                        my ($brStartLine, $brStartCol, $endLine,
                            $endCol, $trueCount, $falseCount,
                            $fileId, $expandedId, $kind) = @$b;
                        my ($line, $col) = ($brStartLine, $brStartCol);
                        my $expr;

                        if ($fileId == 0) {
                            $expr =
                                $srcReader->getExpr($line, $col, $endLine,
                                                    $endCol)
                                if $srcReader->notEmpty();
                        } else {
                            # Find a source range, which contains the branch.
                            while ($regionIdx < scalar(@$regions)) {
                                my ($rStartLine, $rStartCol, $rEndLine,
                                    $rEndCol, $rCount, $rFileId,
                                    $rExpandedId, $rKind
                                ) = @{$regions->[$regionIdx]};
                                if ($rExpandedId == $fileId && $rKind == 1) {
                                    if ($rFileId != 0) {
                                        # Check previous regions to find one
                                        # that describes lines of the function's
                                        # source file.
                                        my $rIdx = $regionIdx - 1;
                                        $fileId = $rFileId;
                                        while ($fileId != 0 && $rIdx >= 0) {
                                            ($rStartLine, $rStartCol,
                                             $rEndLine, $rEndCol,
                                             $rCount, $rFileId,
                                             $rExpandedId, $rKind
                                            ) = @{$regions->[$rIdx]};
                                            $fileId = $rFileId
                                                if ($rExpandedId == $fileId &&
                                                    $rKind == 1);
                                            --$rIdx;
                                        }
                                    }
                                    ($line, $col) = ($rStartLine, $rStartCol);
                                    last;
                                }
                                ++$regionIdx;
                            }
                        }
                        $fileId = $b->[6];
                        # Consider only branches of "MCDCBranchRegion" kind.
                        if ($mcdc &&
                            $kind == 6 &&
                            !defined($expanded_mcdcBranches{$fileId})) {
                            if ($fileId &&
                                scalar(@mcdcBranches) &&
                                $fileId == $mcdcBranches[-1]->[0]) {
                                pop(@mcdcBranches);
                                $expanded_mcdcBranches{$fileId} = $line;
                            } else {
                                push(@mcdcBranches,
                                     [$fileId, $line, $col, $expr]);
                            }
                        }

                        if ($lcovutil::br_coverage) {
                            # Processed branch on the same line doesn't have to be the previous.
                            my $brEntry = $funcBranchData->value($line);
                            my $branchIdx =
                                !defined($brEntry) ? 0 :
                                scalar(@{$brEntry->getBlock(0)});
                            my $br =
                                BranchBlock->new($branchIdx, $trueCount,
                                                 !defined($expr) ? $branchIdx :
                                                     "(" . $expr . ") == True");
                            $funcBranchData->append($line, 0, $br, $filename);

                            ++$branchIdx;
                            $br =
                                BranchBlock->new($branchIdx, $falseCount,
                                                !defined($expr) ? $branchIdx :
                                                    "(" . $expr . ") == False");
                            $funcBranchData->append($line, 0, $br, $filename);
                        }
                    }
                    $branchData->union($funcBranchData)
                        if $lcovutil::br_coverage;
                }
                if ($mcdc) {
                    foreach my $m (@$mcdc) {
                        die("unexpected MC/DC data") unless scalar(@$m) == 10;
                        my ($line, $col, $endLine, $endCol,
                            $trueCount, $falseCount, $fileId, $expandedId,
                            $kind, $cov) = @$m;
                        die("unexpected MC/DC cov")
                            unless 'ARRAY' eq ref($cov);
                        my $expr;
                        my @brExprs;
                        if ($fileId == $expandedId) {
                            foreach my $branch (@mcdcBranches) {
                                my ($brFileId, $brLine, $brCol, $brExpr) =
                                    @$branch;
                                if (($brLine > $line ||
                                     ($brLine == $line && $brCol >= $col))
                                    &&
                                    ($brLine < $endLine ||
                                     ($brLine == $endLine && $brCol <= $endCol))
                                ) {
                                    push(@brExprs, [$brLine, $brCol, $brExpr]);
                                }
                            }
                            @brExprs = sort {$a->[0] <=> $b->[0] ||
                                             $a->[1] <=> $b->[1]
                                            } @brExprs;
                            $expr =
                                $srcReader->getExpr($line, $col,
                                                    $endLine, $endCol)
                                if $srcReader->notEmpty();
                        } else {
                            $line = $expanded_mcdcBranches{$fileId};
                        }
                        my $current_mcdc =
                            $mcdcData->new_mcdc($mcdcData, $line);
                        my $groupSize = scalar(@$cov);
                        my $idx       = 0;
                        foreach my $c (@$cov) {
                            my $brExpr = $brExprs[$idx]->[2]
                                if ($fileId == $expandedId &&
                                    $idx < scalar(@brExprs));
                            my $fullExpr = defined($brExpr) &&
                                defined($expr) ? "'$brExpr' in '$expr'" : $idx;
                            $current_mcdc->insertExpr($filename, $groupSize, 0,
                                                      $c, $idx, $fullExpr);
                            $current_mcdc->insertExpr($filename, $groupSize, 1,
                                                      $c, $idx, $fullExpr);
                            ++$idx;
                        }
                        $mcdcData->close_mcdcBlock($current_mcdc);
                    }
                }
                $info->testbr()->remove($testname)
                    if $mcdc && !$lcovutil::br_coverage;
            }
        }
        lcovutil::info(2, "finished $jsonFile\n");
    }
    # now create the merge summary...
    foreach my $filename ($top->files()) {
        my $info = $top->data($filename);

        my @work;
        push(@work, [$info->sum(), $info->test]);
        push(@work, [$info->sumbr(), $info->testbr])
            if $lcovutil::br_coverage;
        push(@work, [$info->func(), $info->testfnc])
            if $lcovutil::func_coverage;
        push(@work, [$info->mcdc(), $info->testcase_mcdc()])
            if $lcovutil::mcdc_coverage;

        foreach my $d (@work) {
            my ($sum, $pertest) = @$d;
            $sum->union($pertest->value($testname));
        }
    }
    return $top;
}

my $output_filename = 'llvm2lcov.info';
my $testname;
my %opts = ('test-name|t=s'       => \$testname,
            'output-filename|o=s' => \$output_filename,);
my %rc_opts;
if (!lcovutil::parseOptions(\%rc_opts, \%opts, \$output_filename)) {
    print(STDERR "argparse failed\n");
    exit(1);
}

my $info = parse($testname, @ARGV);
$info->applyFilters(ReadCurrentSource->new());
$info->write_info_file($output_filename);

$info->print_summary() if $lcovutil::verbose >= 0;
my $exit_code = 0;
$info->checkCoverageCriteria();
CoverageCriteria::summarize();
$exit_code = 1 if $CoverageCriteria::coverageCriteriaStatus;

lcovutil::summarize_messages();

lcovutil::cleanup_callbacks();

exit $exit_code;
