#!/usr/bin/env perl

use strict;
use warnings;

use Cwd qw(abs_path);
use File::Find qw(find);
use File::Spec;
use Getopt::Long qw(GetOptions);
use JSON::MaybeXS;
use Pod::Usage;
use Readonly;
use Time::HiRes qw(time);

use App::Test::Generator::Mutator;

=pod

=head1 NAME

test-generator-mutate - Run mutation testing against a Perl test suite

=head1 SYNOPSIS

    test-generator-mutate [options]

    test-generator-mutate --lib lib --tests t
    test-generator-mutate --file lib/My/Module.pm
    test-generator-mutate --json mutation.json
    test-generator-mutate --min-score 75

=head1 QUICK START

    test-generator-mutate --lib lib --min-score 85 --json mutation.json

=head2 Numeric Boundary Mutants

Kill Numeric Boundary Mutants first,
these are the easiest wins.
For example,
C<NUM_BOUNDARY_1295>
means something like C<if ($x > 10)> became C<if ($x >= 10)> or C<if ($x > 9)>.
If that survived, it means, there is a missing edge value.
Numeric mutations are important because they reveal missing edge coverage.
This example means line 1295.

So if that line contains something like this:

  if(((scalar keys %input) == 1) && exists($input{'type'}) && !ref($input{'type'})) {

You need to add a test where

=over 4

=item * %input contains more than one key

=item * One of them is type

=item * And behavior must be different

=back

For example, if you have a test with

  %input = ( type => 'string' )

add a test which sets

  %input = (
    type => 'string',
    something_else => 'value'
  )

=head2 Conditional Inversions

Then kill Conditional Inversions, for example, C<COND_INV_1186>,
where C<unless (-f $file)> became C<if (-f $file)>.
If that survives,
test did not assert the negative case.

Focus by file,
if one file contributes 200 survivors, that's the weakest module.

Frequently re-run.
The loop should be: add 5-10 targeted tests, re-run mutation tool, watch score climb, repeat.

=head1 DESCRIPTION

This command-line tool performs mutation testing on a Perl codebase.

It scans one or more C<.pm> files, generates code mutations using
L<App::Test::Generator::Mutator>, and runs the project's test suite
against each mutated version inside an isolated workspace.

For each generated mutant:

=over 4

=item *

The mutant is applied in a temporary workspace.

=item *

The mutated file is syntax-checked.

=item *

The test suite is executed using C<prove>.

=item *

If the tests fail, the mutant is considered I<killed>.

=item *

If the tests pass, the mutant is considered I<survived>.

=back

A mutation score is then calculated:

    (killed / total) * 100

Mutation testing measures the effectiveness of a test suite. A higher
mutation score indicates that the tests are better at detecting behavioral
changes in the code.

=head1 OPTIONS

=head2 --lib <dir>

Directory containing Perl modules to mutate.

Defaults to C<lib>.

=head2 --file <file>

Mutate a single file instead of scanning the entire C<--lib> directory.

=head2 --tests <dir>

Directory containing test files.

Defaults to C<t>.

=head2 --min-score <int>

Minimum acceptable mutation score (percentage).

If the final score is below this value, the program exits with a
non-zero status.

=head2 --json <file>

Write mutation results to the specified JSON file.

The output structure:

    {
        score    => "85.32",
        total    => 120,
        killed   => 102,
        survived => [ ... mutant IDs ... ]
    }

=head2 --cover_json <file>

The location of the file generated by C<cover -report json>.
That file is used to generate an approximation for an LCSAJ table.

=head2 --fail-fast

(Reserved for future use.)

=head2 --mutation_level <full|fast>

Setting to C<fast> removes redundant mutations and dedups mutations before running.
The default is C<full>.

=head2 --timeout <seconds>

(Reserved for future use.)

=head2 --verbose

Print progress information.

=head2 --quiet

Suppress final summary output.

=head1 EXIT CODES

=over 4

=item = 0

Success and mutation score meets minimum threshold.

=item = 1

Mutation score below C<--min-score>.

=item = 2

Baseline test suite failed before mutation testing began.

=item = 3

Invalid command-line options.

=back

=head1 WORKFLOW

The tool performs the following steps:

=over 4

=item 1.

Collect target files (either a single file or all C<.pm> files under C<--lib>).

=item 2.

Run baseline tests to ensure the suite passes before mutation.

=item 3.

Generate mutants for each file.

=item 4.

Apply each mutant in isolation and re-run the test suite.

=item 5.

Calculate and report mutation statistics.

=back

=encoding utf-8

=head1 WORKFLOW DIAGRAM

The mutation testing process follows this execution flow:

    ┌───────────────────────────────┐
    │ Start                         │
    └───────────────┬───────────────┘
                    │
                    ▼
    ┌───────────────────────────────┐
    │ Collect Target Files          │
    │  --file OR scan --lib/*.pm    │
    └───────────────┬───────────────┘
                    │
                    ▼
    ┌───────────────────────────────┐
    │ Run Baseline Tests            │
    │ prove -l t                    │
    └───────────────┬───────────────┘
                    │
         Baseline OK? ── No ──► Exit (code 2)
                    │
                   Yes
                    │
                    ▼
    ┌───────────────────────────────┐
    │ For Each File                │
    └───────────────┬───────────────┘
                    │
                    ▼
    ┌───────────────────────────────┐
    │ Generate Mutants             │
    │ (conditional flips, etc.)    │
    └───────────────┬───────────────┘
                    │
                    ▼
        ┌───────────────────────────────┐
        │ For Each Mutant              │
        └───────────────┬───────────────┘
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Prepare Workspace            │
        │ (isolated temp directory)    │
        └───────────────┬───────────────┘
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Apply Mutant                 │
        └───────────────┬───────────────┘
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Syntax Check                 │
        │ perl -c mutated_file.pm      │
        └───────────────┬───────────────┘
                        │
              Compiles? ── No ──► Skip Mutant
                        │
                       Yes
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Run Test Suite               │
        │ prove t                      │
        └───────────────┬───────────────┘
                        │
          Tests Fail? ── Yes ──► Killed++
                        │
                       No
                        │
                        ▼
                   Survived++
                        │
                        ▼
        ┌───────────────────────────────┐
        │ Repeat for Next Mutant       │
        └───────────────┬───────────────┘
                        │
                        ▼
    ┌───────────────────────────────┐
    │ Calculate Mutation Score      │
    │ (killed / total) * 100        │
    └───────────────┬───────────────┘
                    │
                    ▼
    ┌───────────────────────────────┐
    │ Print Report / Write JSON     │
    └───────────────┬───────────────┘
                    │
                    ▼
                 Finish

=cut

# --------------------------------------------------
# Difficulty label strings and priority scores.
# TODO: move to App::Test::Generator::Constants once
# that module is created, so they can be shared with
# SchemaExtractor and other consumers.
# --------------------------------------------------
Readonly my $DIFFICULTY_HIGH_LABEL   => 'HIGH';
Readonly my $DIFFICULTY_MEDIUM_LABEL => 'MEDIUM';
Readonly my $DIFFICULTY_LOW_LABEL    => 'LOW';

Readonly my $DIFFICULTY_HIGH_SCORE   => 3;
Readonly my $DIFFICULTY_MEDIUM_SCORE => 2;
Readonly my $DIFFICULTY_LOW_SCORE    => 1;

# --------------------------------------------------
# Status strings written into the JSON output and
# used by test-generator-index to colour-code the
# mutation heatmap.
# TODO: move to App::Test::Generator::Constants.
# --------------------------------------------------
Readonly my $STATUS_SURVIVED => 'Survived';
Readonly my $STATUS_KILLED   => 'Killed';

# --------------------------------------------------
# Mutation level strings accepted by --mutation_level.
# 'full' runs every mutant independently.
# 'fast' deduplicates mutants by site before running.
# --------------------------------------------------
Readonly my $MUTATION_LEVEL_FULL => 'full';
Readonly my $MUTATION_LEVEL_FAST => 'fast';

my %opt = (
	lib            => 'lib',
	tests          => 't',
	min_score      => 0,
	fail_fast      => 0,
	verbose        => 0,
	quiet          => 0,
	man            => 0,
	help           => 0,
	mutation_level => $MUTATION_LEVEL_FULL,
	lcsaj_root     => 'lcsaj',
	lcsaj_hits     => 'cover_html/lcsaj_hits.json',
);

# Exit code 3 is "invalid command-line options" per the EXIT CODES POD section
GetOptions(
	'lib=s'            => \$opt{lib},
	'file=s'           => \$opt{file},
	'tests=s'          => \$opt{tests},
	'min-score=i'      => \$opt{min_score},
	'cover_json=s'     => \$opt{cover_json},
	'json=s'           => \$opt{json},
	'fail-fast'        => \$opt{fail_fast},
	'timeout=i'        => \$opt{timeout},
	'verbose'          => \$opt{verbose},
	'quiet'            => \$opt{quiet},
	'help|h'           => \$opt{help},
	'mutation_level=s' => \$opt{mutation_level},
	'man|m'            => \$opt{man},
	'lcsaj_root=s'     => \$opt{lcsaj_root},
	'lcsaj_hits=s'     => \$opt{lcsaj_hits},
) or pod2usage(3);

pod2usage(-exitval => 0, -verbose => 1) if $opt{help};
pod2usage(-exitval => 0, -verbose => 2) if $opt{man};

# Warn when reserved-for-future-use options are supplied so the
# caller knows they have no effect yet rather than silently ignoring
warn "--fail-fast is reserved for future use and has no effect\n"
	if $opt{fail_fast};
warn "--timeout is reserved for future use and has no effect\n"
	if $opt{timeout};

# -------------------------
# Collect Files
# -------------------------

my @files;

if($opt{file}) {
	push @files, $opt{file};
} else {
	find(
		sub {
			push @files, $File::Find::name if /\.pm$/;
		},
		$opt{lib}
	);
}

# Pass absolute target file paths to the LCSAJ runtime debugger
# via environment variable so it knows which files to instrument
$ENV{LCSAJ_TARGETS} = join(':', map { abs_path($_) } @files);

# -------------------------
# Verify baseline tests
# -------------------------

print "Running baseline tests...\n" if $opt{verbose};

# Use list form of system() to avoid shell interpolation of $opt{tests},
# which could be a security risk if the path contains special characters
if(system('prove', '-Mblib', '-l', $opt{tests}) != 0) {
	print STDERR "Baseline tests failed.\n";
	exit 2;
}

# -------------------------
# Run Mutation Testing
# -------------------------

my $total   = 0;
my $killed  = 0;
my @survivors;
my @killed_mutants;

for my $file (@files) {
	my $mutator = App::Test::Generator::Mutator->new(
		file           => $file,
		lib_dir        => $opt{lib},
		mutation_level => $opt{mutation_level},
	);

	my @mutants = $mutator->generate_mutants();

	my $groups;

	if($opt{mutation_level} eq $MUTATION_LEVEL_FAST) {
		# Fast mode: deduplicate mutants by mutation site before
		# running to avoid redundant test runs
		$groups = group_mutants($file, \@mutants);
	} else {
		# Full mode: run every mutant independently
		$groups = { map { $_->id => [$_] } @mutants };
	}

	for my $group (values %{$groups}) {
		my $mutant = representative_mutant($group);

		print 'Testing representative mutant ',
			$mutant->id,
			" ($file line ",
			$mutant->line,
			') representing ',
			scalar(@{$group}),
			" mutants\n" if $opt{verbose};

		my $workspace = $mutator->prepare_workspace();

		$mutator->apply_mutant($mutant);

		# Point PERL5LIB at the workspace so prove picks up the
		# mutated module rather than the original in blib/ or lib/
		local $ENV{PERL5LIB} = File::Spec->catfile($workspace, $opt{lib});

		my $compile = system($^X, '-c', File::Spec->catfile($workspace, $file));
		next if $compile != 0;

		# Clear PERL5OPT to prevent -Ilib or other flags from
		# overriding the workspace PERL5LIB — if PERL5OPT contains
		# -Ilib, prove will load modules from the project's own lib/
		# instead of the mutated workspace, causing all mutants to
		# survive regardless of the mutation applied
		local $ENV{PERL5OPT} = '';

		my $survived = (system('prove', $opt{tests}) == 0);

		if($survived) {
			# Representative survived — mark the entire group as survived
			for my $m (@{$group}) {
				my $difficulty = mutation_difficulty($m);

				push @survivors, {
					id          => $m->id(),
					line        => $m->line(),
					file        => $file,
					description => $m->description(),
					status      => $STATUS_SURVIVED,
					difficulty  => $difficulty->{label},
					priority    => $difficulty->{score},
					hint        => $difficulty->{hint},
				};

				$total++;
			}
		} else {
			# Representative killed — assume the whole group is killed
			for my $m (@{$group}) {
				push @killed_mutants, {
					id          => $m->id(),
					line        => $m->line(),
					file        => $file,
					description => $m->description(),
					status      => $STATUS_KILLED,
				};

				$total++;
				$killed++;
			}
		}

		# workspace auto-destroyed when $workspace goes out of scope
	}
}

# -------------------------
# Report
# -------------------------

my $score = $total ? sprintf('%.2f', ($killed / $total) * 100) : 100;

unless($opt{quiet}) {
	print "\nMutation Score: $score%\n",
		"Total: $total\n",
		"Killed: $killed\n",
		'Survived: ', scalar(@survivors), "\n";
}

if($opt{json}) {
	# Sort survivors worst-first so the most impactful gaps
	# appear at the top of the generated test stub file
	@survivors = sort { $b->{priority} <=> $a->{priority} } @survivors;

	open(my $fh, '>', $opt{json}) or die "Cannot write $opt{json}: $!";

	print $fh encode_json({
		score          => $score,
		total          => $total,
		killed_count   => $killed,
		survived_count => scalar(@survivors),
		killed         => \@killed_mutants,
		survived       => \@survivors,
	});

	close $fh;
}

if($score < $opt{min_score}) {
	print STDERR "score $score is less than min_score $opt{min_score}\n";
	exit 1;
}

exit 0;

# --------------------------------------------------
# group_mutants
#
# Purpose:    Group mutants that affect the same
#             mutation site (file, line, and original
#             operator) so that only one representative
#             mutant needs to be tested per site in
#             fast mode.
#
# Entry:      $file    - path to the source file being
#                        mutated.
#             $mutants - arrayref of mutant objects as
#                        returned by generate_mutants().
#
# Exit:       Returns a hashref keyed by site key
#             (file:line:operator), where each value
#             is an arrayref of mutant objects at that
#             site.
#
# Side effects: None.
#
# Notes:      The site key uses the original token
#             (operator or keyword being mutated) with
#             whitespace stripped, so that formatting
#             differences do not create spurious groups.
#             Used only when --mutation_level=fast.
# --------------------------------------------------
sub group_mutants {
	my ($file, $mutants) = @_;

	my %groups;

	for my $m (@{$mutants}) {
		my $line = $m->line;

		# Original token being mutated (e.g. >, ==, !)
		my $orig = defined($m->original) ? $m->original : '';

		# Strip whitespace so formatting differences don't
		# create spurious distinct groups for the same site
		$orig =~ s/\s+//g;

		# Group key identifies a unique mutation site
		my $key = join(':', $file, $line, $orig);

		push @{$groups{$key}}, $m;
	}

	return \%groups;
}

# --------------------------------------------------
# representative_mutant
#
# Purpose:    Select a single mutant from a group to
#             stand in for all mutants at that site
#             when running in fast mode. If the
#             representative is killed, the whole
#             group is assumed killed; if it survives,
#             the whole group is assumed to survive.
#
# Entry:      $group - arrayref of mutant objects at
#             a single mutation site.
#
# Exit:       Returns a single mutant object. Never
#             returns undef (falls back to the first
#             element of the group).
#
# Side effects: None.
#
# Notes:      Prefers boundary-type mutations when
#             available since they probe the most
#             meaningful behavioural edge. Falls back
#             to the first mutant in the group if no
#             boundary mutant is present.
# --------------------------------------------------
sub representative_mutant {
	my $group = $_[0];

	for my $m (@{$group}) {
		# Prefer boundary-type mutations — they are the most
		# informative representatives since they probe numeric
		# edge cases that other mutation types may not exercise
		if($m->id() && $m->id() =~ /BOUNDARY|NUM/) {
			return $m;
		}
	}

	# Fallback: use the first mutant in the group
	return $group->[0];
}

# --------------------------------------------------
# mutation_difficulty
#
# Purpose:    Assign a difficulty label, priority
#             score, and hint string to a surviving
#             mutant, reflecting how meaningful it
#             would be to kill it and what kind of
#             test is likely to do so.
#
# Entry:      $mutant - a mutant object with an id()
#             method returning the mutant ID string.
#
# Exit:       Returns a hashref with three keys:
#               score - numeric priority (higher is
#                       more important to kill)
#               label - one of HIGH, MEDIUM, LOW
#               hint  - human-readable advice on what
#                       test to write
#
# Side effects: None.
#
# Notes:      Classification is based on the mutant
#             ID prefix, which encodes the mutation
#             type. This is a heuristic — the same
#             type of mutation may be more or less
#             impactful depending on context.
#
#             The label and score constants are
#             candidates for extraction to
#             App::Test::Generator::Constants once
#             that module is created.
# --------------------------------------------------
sub mutation_difficulty {
	my $mutant = $_[0];

	my $id = $mutant->id // '';

	# Numeric boundary mutants expose missing edge-case tests —
	# the highest priority since they reveal concrete value gaps
	if($id =~ /BOUNDARY|NUM/) {
		return {
			score => $DIFFICULTY_HIGH_SCORE,
			label => $DIFFICULTY_HIGH_LABEL,
			hint  => 'Likely missing edge-case test (boundary value)',
		};
	}

	# Conditional inversions reveal missing negative assertions —
	# medium priority since one branch was left untested
	if($id =~ /COND|INV|NEG/) {
		return {
			score => $DIFFICULTY_MEDIUM_SCORE,
			label => $DIFFICULTY_MEDIUM_LABEL,
			hint  => 'Add tests asserting both true and false outcomes',
		};
	}

	# Logical operator changes reveal missing combination tests
	if($id =~ /LOGIC|AND|OR/) {
		return {
			score => $DIFFICULTY_MEDIUM_SCORE,
			label => $DIFFICULTY_MEDIUM_LABEL,
			hint  => 'Test combinations where only one logical operand is true',
		};
	}

	# Default: minor mutation with limited behavioural impact
	return {
		score => $DIFFICULTY_LOW_SCORE,
		label => $DIFFICULTY_LOW_LABEL,
		hint  => 'Mutation survived but impact may be minor',
	};
}

=head1 AUTHOR

Nigel Horne

=cut
