# spelling -- lintian check script -*- perl -*-

# Look for common spelling errors in the package description and the
# copyright file.

# Copyright (C) 1998 Richard Braakman
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::spelling;
use strict;
use Tags;

# All spelling errors that have been observed "in the wild" in package
# descriptions are added here, on the grounds that if they occurred
# once they are more likely to occur again.

# Misspellings of "compatibility", "separate", and "similar" are 
# particularly common.

# Be careful with corrections that involve punctuation, since the check
# is a bit rough with punctuation.  For example, I had to delete the
# correction of "builtin" to "built-in".

my %corrections = qw(
		     accesnt accent
		     accelleration acceleration
		     accessable accessible
		     accomodate accommodate
		     acess access
		     acording according
		     additionaly additionally
		     adress address
		     adresses addresses
		     adviced advised
		     albumns albums
		     alegorical allegorical
		     algorith algorithm
		     allpication application
		     altough although
		     alows allows
		     amoung among
		     amout amount
		     analysator analyzer
		     ang and
		     appropiate appropriate
		     arraival arrival
		     artifical artificial
		     artillary artillery
		     attemps attempts
		     authentification authentication
		     automaticly automatically
		     automatize automate
		     automatized automated
		     automatizes automates
		     auxilliary auxiliary
		     availavility availability
		     availble available
		     avaliable available
		     availiable available
		     backgroud background
		     baloons balloons
		     becomming becoming
		     becuase because
		     calender calendar
		     cariage carriage
		     challanges challenges
		     changable changeable
		     charachters characters
		     charcter character
		     choosen chosen
		     colorfull colorful
		     comand command
		     commerical commercial
		     comminucation communication
		     commoditiy commodity
		     compability compatibility
		     compatability compatibility
		     compatable compatible
		     compatibiliy compatibility
		     compatibilty compatibility
		     compleatly completely
		     complient compliant
		     compres compress
		     containes contains
		     containts contains
		     contence contents
		     continous continuous
		     contraints constraints
		     convertor converter
		     convinient convenient
		     cryptocraphic cryptographic
		     deamon daemon
		     debain Debian
		     debians Debian\'s
		     decompres decompress
		     definate definite
		     definately definitely
		     dependancies dependencies
		     dependancy dependency
		     dependant dependent
		     developement development
		     developped developed
		     deveolpment development
		     devided divided
		     dictionnary dictionary
		     diplay display
		     disapeared disappeared
		     dissapears disappears
		     documentaion documentation
		     docuentation documentation
		     documantation documentation
		     dont don\'t
		     easilly easily
		     ecspecially especially
		     edditable editable
		     editting editing
		     eletronic electronic
		     enchanced enhanced
		     encorporating incorporating
		     enlightnment enlightenment
		     enterily entirely
		     enviroiment environment
		     environement environment
		     excellant excellent
		     exlcude exclude
		     exprimental experimental
		     extention extension
		     failuer failure
		     familar familiar
		     fatser faster
		     fetaures features
		     forse force
		     fortan fortran
		     framwork framework
		     fuction function
		     fuctions functions
		     functionnality functionality
		     functonality functionality
		     functionaly functionally
		     futhermore furthermore
		     generiously generously
		     grahical graphical
		     grahpical graphical
		     grapic graphic
		     guage gauge
		     halfs halves
		     heirarchically hierarchically
		     helpfull helpful
		     hierachy hierarchy
		     hierarchie hierarchy
		     howver however
		     implemantation implementation
		     incomming incoming
		     incompatabilities incompatibilities
		     indended intended
		     indendation indentation
		     independant independent
		     informatiom information
		     initalize initialize
		     inofficial unofficial
		     integreated integrated
		     integrety integrity
		     integrey integrity
		     intendet intended
		     interchangable interchangeable
		     intermittant intermittent
		     jave java
		     langage language
		     langauage language
		     langugage language
		     lauch launch
		     lesstiff lesstif
		     libaries libraries
		     libary library
		     licenceing licencing
		     loggin login
		     logile logfile
		     loggging logging
		     maintainance maintenance
		     maintainence maintenance
		     makeing making
		     managable manageable
		     manoeuvering maneuvering
		     mathimatic mathematic
		     mathimatics mathematics
		     mathimatical mathematical
		     ment meant
		     modulues modules
		     monochromo monochrome
		     multidimensionnal multidimensional
		     navagating navigating
		     nead need
		     neccesary necessary
		     neccessary necessary
		     necesary necessary
		     nescessary necessary
		     noticable noticeable
		     optionnal optional
		     orientatied orientated
		     orientied oriented
		     pacakge package
		     pachage package
		     packacge package
		     packege package
		     packge package
		     pakage package
		     particularily particularly
		     persistant persistent
		     plattform platform
		     ploting plotting
		     protable portable
		     posible possible
		     powerfull powerful
		     prefered preferred
		     prefferably preferably
		     prepaired prepared
		     princliple principle
		     priorty priority
		     proccesors processors
		     proces process
		     processsing processing
		     processessing processing
		     progams programs
		     programers programmers
		     programm program
		     programms programs
		     promps prompts
		     pronnounced pronounced
		     prononciation pronunciation
		     pronouce pronounce
		     protcol protocol
		     protocoll protocol
		     recieve receive
		     recieved received
		     redircet redirect
		     regulamentations regulations
		     remoote remote
		     repectively respectively
		     replacments replacements
		     requiere require
		     runnning running
		     safly safely
		     savable saveable
		     searchs searches
		     separatly separately
		     seperate separate
		     seperated separated
		     seperately separately
		     seperatly separately
		     serveral several
		     setts sets
		     similiar similar
		     simliar similar
		     speach speech
		     splitted split
		     standart standard
		     staically statically
		     staticly statically
		     succesful successful
		     succesfully successfully
		     suplied supplied
		     suport support
		     suppport support
		     supportin supporting
		     synchonized synchronized
		     syncronize synchronize
		     syncronizing synchronizing
		     syncronus synchronous
		     syste system
		     sythesis synthesis
		     taht that
		     throught through
		     useable usable
		     usefull useful
		     usera users
		     usetnet Usenet
		     utilites utilities
		     utillities utilities
		     utilties utilities
		     utiltity utility
		     utitlty utility
		     variantions variations
		     varient variant
		     verson version
		     vicefersa vice-versa
		     yur your
		     wheter whether
		     wierd weird
		     xwindows X
		    );
# The format above doesn't allow spaces
$corrections{'alot'} = 'a lot';

my %corrections_language_names = qw(
				    english English
				    french French
				    german German
				    russian Russian
				   );

sub run {

my $pkg = shift;
my $type = shift;

# Read in entire files at one gulp.
local $/ = undef;

# Check defined(), because for some reason <CPY> returns the undefined
# value if the file is length 0.

if (open(DESC, "fields/description")) {
    my $description = <DESC>;
    close(DESC);
    spelling_check("spelling-error-in-description", $description)
	if defined($description);
}

if (open(CPY, "copyright")) {
    my $copyright = <CPY>;
    close(CPY);
    spelling_check("spelling-error-in-copyright", $copyright)
	if defined($copyright);
}

if (open(RMD, "README.Debian")) {
    my $readme = <RMD>;
    close(RMD);
    spelling_check("spelling-error-in-readme-debian", $readme)
	if defined($readme);
}

#if (open(CHG, "changelog.Debian")) {
#    $changelog = <CHG>;
#    close(CHG);
#    spelling_check("spelling-error-in-debian-changelog", $changelog)
#	if defined($changelog);
#}

}

# -----------------------------------

sub tag_error {
    my $tag = shift;
    if ($#_ >= 0) {
	# We can't have newlines in a tag message, so turn them into \n
	my @args = @_;
	map { s,\n,\\n, } @args;
	my $args = join ' ', @args;
	tag "$tag", "$args";
    } else {
	tag "$tag", "";
    }
}

sub spelling_check {
    my $tag = shift;
    my $file = shift;
	
    foreach my $word (split(/\s+/, $file)) {
	# before lowercasing the word, check if it's a non-uppercased
	# language name
	if (exists $corrections_language_names{$word}) {
	    tag_error($tag, $word, $corrections_language_names{$word});
        }
	$word = lc $word;
	# try deleting the non-alphabetic parts from the word.
	# Treat apostrophes specially: only delete them if they occur
	# at the beginning or end of the word.
	$word =~ s/(^\')|[^\w\xc0-\xd6\xd8-\xf6\xf8-\xff\']+|(\'$)//g;
	if (exists $corrections{$word}) {
	    tag_error($tag, $word, $corrections{$word});
        }
    }
    # special case for correcting a multi-word string
    # $corrections{'Debian/GNU Linux'} = 'Debian GNU/Linux';
    if ($file =~ m,Debian/GNU Linux,) {
	tag_error($tag, "Debian/GNU Linux", "Debian GNU/Linux");
    }
}

1;

# vim: syntax=perl
