Her er selve Perl-scriptet
****************
#!/usr/bin/perl
# Intermediate Search, Version 1.1
# Copyright 1997 by Fluid Dynamics <xav.com>
# You are free to use the script, but please ask before you
# distribute it.
#
# For latest version and help files, visit:
#
http://www.xav.com/scripts/search/# __________________________________________________________________
$basedir = '/kunder/weblightdk/htdocs/';
# The directory location of all your files. Remember the trailing
# slash.
$baseurl = '
http://weblight.dk/';# The URL corresponding to the base directory.
@files = ('opskriften/hjem/*/*.htm',
'opskriften/hjem/*/*/*.htm',
'opskriften/hjem/*/*/*/*.htm',
'opskriften/hjem/*/*/*/*/*.htm');
# These are all the files that will be listed. The asterisk is a
# wildcard - it will list all files and directories.
$summary_file = '/summaries.html';
# Make this writable (chmod 777 summaries.html) and hide it well!
# It holds the results of everybody's searches so you'll know what
# people are really looking for when they come to your site. We
# have placed our summary file in a non-web directory so others
# can't see it - you could put it in a hidden or secure directory.
$link_url = '';
$link_title = '';
# Enter the URL and title of your main web page.
$java_toys = 'on';
# If your visitors can't handle Java, better set this to 'off'.
$searchpict = '
http://www.weblight.dk/temp/search.gif';# The URL of the E3 picture.
$cgi_url = 'search.cgi';
# Change this to the full URL only if your rename this script.
# This array holds info on all the directories and filetypes you'd like
# your visitors to search. Visit the readme file for more customizing
# information.
# Options for Weighted Search:
#
# All occurrences of a search term count as one point. The occurrence
# of a term in the filename, title, META keywords, or META description
# can have added weight (equivalent to a multiplier per hit). Enter
# the multipliers in the array below - the defaults are (2,2,4,2). If
# this makes no sense to you, just ignore it and leave the defaults as
# they are - they work pretty well. Note that this will give extra
# weight to those pages that have a properly formatted title and META
# tags, even if they contain the same basic information.
($name_x, $title_x, $keywords_x, $description_x) = (2,2,4,2);
# No further editing is necessary, but feel free to play around...
# Note that much of the code below is straight HTML, and very easy to
# modify if you know a little about HTML programming.
#
# __________________________________________________________________
read(STDIN,$buffer,$ENV{'CONTENT_LENGTH'});
@pairs = split(/&/,$buffer);
foreach $pair (@pairs)
{
($name,$value) = split(/=/,$pair);
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$FORM{$name} = $value;
}
if ($FORM{'terms'})
{
&get_files;
&search;
&return_html;
}
else
{
&prompt;
}
sub prompt
{
print "Content-type: text/html\n\n";
print <<EOM;
<HTML>
<HEAD>
<TITLE></TITLE>
</HEAD>
<body background="
http://weblight.dk/opskriften/images/baggrunde/visningbag.gif" bgcolor="#FFFFFF" topmargin="0" link="#000000" vlink="#000000" alink="#FF0000">
<table border="0" width="420" cellspacing="0" cellpadding="0">
<tr>
<td>
<BR><BLOCKQUOTE>
<FORM METHOD=POST ACTION="$cgi_url" NAME="searchform">
<INPUT TYPE=TEXT NAME="terms" SIZE=30>
<INPUT TYPE=SUBMIT VALUE="Find det!"><BR>
</BLOCKQUOTE>
EOM
if ($java_toys eq 'on')
{
print "<SCRIPT LANGUAGE=\"JavaScript\">\n";
print "<!-- script hiding...\n";
print "document.searchform.terms.focus();\n";
print "// End hiding -->\n";
print "</SCRIPT>\n";
}
print <<EOM;
<A NAME="tips"></A>
</FORM>
EOM
}
sub get_files
{
&bad_base unless (-e $basedir);
chdir($basedir);
foreach $file (@files)
{
$ls = `ls $file`;
@ls = split(/\s+/,$ls);
foreach $temp_file (@ls) {
if (-d $file) {
$filename = "$file$temp_file";
if (-T $filename) {
push(@FILES,$filename);
}
}
elsif (-T $temp_file) {
push(@FILES,$temp_file);
}
}
}
}
sub search
{
# Convert multiple blank spaces to single spaces:
$FORM{'terms'} =~ s/\s+/ /g;
$FORM{'terms'} = " $FORM{'terms'} ";
# Convert NOT statements to minus signs:
$FORM{'terms'} =~ s/ not / -/ig;
# Convert AND statements to plus signs:
$FORM{'terms'} =~ s/ and / \+/ig;
# Strip OR statements (OR is the default):
$FORM{'terms'} =~ s/ or / /ig;
# Strip wildcards (bad, bad things!)
$check = 'true' unless ($FORM{'terms'} =~ /\*/);
# Correct for grouped entries:
@terms = split(/\"/,$FORM{'terms'});
$iterator = 0;
$FORM{'terms'} = "";
$placeholder = '%%%==%%%';
foreach $term (@terms)
{
# Do some binary-state switching:
if ($iterator == 1)
{$iterator--;}
else
{$iterator++;}
# The iterator is 0 during grouped states:
$term =~ s/ /$placeholder/g unless $iterator;
$FORM{'terms'} .= $term;
}
# Done correcting for grouped entries - now all "term1 term2"
# queries have the ugly placeholder holding them together instead
# of blank spaces, so they won't get broken up when we do the
# final splitting by spaces.
@terms = split(/\s+/,$FORM{'terms'});
foreach $term (@terms)
{
# Skip null entries (first and last)
next if ($term eq '');
# Unmask grouped terms:
$term =~ s/$placeholder/ /g;
if ($term =~ /^\+/)
{
$term =~ s/\+//o;
$term = '\W' . $term. '\W' unless ($term =~ /^\$/);
$term =~ s/^\$//;
push(@required,$term);
$required_terms_present = "you bet";
}
elsif ($term =~ /^-/)
{
$term =~ s/-//o;
$term = '\W' . $term. '\W' unless ($term =~ /^\$/);
$term =~ s/^\$//;
push(@forbidden,$term);
$forbidden_terms_present = "fraid so";
}
else
{
$term = '\W' . $term. '\W' unless ($term =~ /^\$/);
$term =~ s/^\$//;
push(@optional,$term);
}
}
foreach $FILE (@FILES)
{
open(FILE,"$FILE");
@LINES = <FILE>;
close(FILE);
$string = join(' ',@LINES);
$string =~ s/\n//g;
# Extract the title, if there is one:
if ($string =~ /<title>(.*)<\/title>/i)
{
$titles{$FILE} = $1;
for ($i=1;$i<$title_x;$i++)
{
$string .= $titles{$FILE};
}
}
$titles{$FILE} = $FILE unless $titles{$FILE};
# Extract the description, if there is one:
if ($string =~ /<meta\s+name="description"\s+content="(.*)>/i)
{
@cut = split(/\">/,$1);
$description{$FILE} = $cut[0];
for ($i=0;$i<$description_x;$i++)
{
$string .= $description{$FILE};
}
}
else
{
$string2 = $string;
$string2 =~ s/<title>(.*)<\/title>//ig;
$string2 =~ s/<([^>]||\n)*>//g;
@words = split(/\s+/,$string2);
for ($i=0;$i<25;$i++)
{$description{$FILE} .= "$words[$i] ";}
$description{$FILE} .= "...";
}
# Extract the keywords, if they exist:
if ($string =~ /<meta\s+name="keywords"\s+content="(.*)>/i)
{
@cut = split(/\">/,$1);
$keywords = $cut[0];
for ($i=0;$i<$keywords_x;$i++)
{
$string .= $keywords;
}
}
# Weight the filename as needed:
for ($i=0;$i<$name_x;$i++)
{
$string .= "$baseurl$FILE";
}
# Now that we're done with the special HTML tags, strip HTML tags
# from the file so that they aren't used in the search:
$string =~ s/<([^>]||\n)*>//g;
# Check for optional terms:
foreach $term (@optional)
{
$lowercase = $term;
$lowercase =~ tr/[A-Z]/[a-z]/;
$lowercase =~ tr/\\w/\\W/;
if (($term eq $lowercase) && ($check))
{
$include{$FILE} = 'yes' if ($string =~ /$term/i);
@count = split(/$term/i,$string);
}
elsif ($check)
{
$include{$FILE} = 'yes' if ($string =~ /$term/);
@count = split(/$term/,$string);
}
$word_count = @count;
$relevance{$FILE} = $relevance{$FILE} + $word_count;
}
# Done checking for optional terms.
# Check for required terms:
if ($required_terms_present eq "you bet")
{
foreach $term (@required)
{
$lowercase = $term;
$lowercase =~ tr/[A-Z]/[a-z]/;
$lowercase =~ tr/\\w/\\W/;
if (($term eq $lowercase) && ($check))
{
if (($string =~ /$term/i) && ($include{$FILE} ne 'no'))
{
$include{$FILE} = 'yes';
@count = split(/$term/i,$string);
}
else
{
$include{$FILE} = 'no';
last;
}
}
elsif ($check)
{
if (($string =~ /$term/) && ($include{$FILE} ne 'no'))
{
$include{$FILE} = 'yes';
@count = split(/$term/,$string);
}
else
{
$include{$FILE} = 'no';
last;
}
}
$word_count = @count;
$relevance{$FILE} = $relevance{$FILE} + $word_count;
}
}
# Done checking for required terms.
# Check for s:
if (($forbidden_terms_present eq "fraid so") && ($check))
{
foreach $term (@forbidden)
{
$lowercase = $term;
$lowercase =~ tr/[A-Z]/[a-z]/;
$lowercase =~ tr/\\w/\\W/;
if (($term eq $lowercase) && ($string =~ /$term/i))
{
$include{$FILE} = 'no';
last;
}
elsif ($string =~ /$term/)
{
$include{$FILE} = 'no';
last;
}
}
}
# Done checking for forbidden terms.
# Allow for wildcard-triggered listing:
$include{$FILE} = 'yes' unless ($check);
# Format for relevance:
if ($include{$FILE} eq 'yes')
{
$relevance = sprintf("%.3f",($relevance{$FILE}/1000));
$HITS{"$relevance$FILE"} = "$FILE";
$hitcount++;
}
} # End loop through all files.
} # End search procedure.
sub return_html
{
# First we build a summary file for the webmaster and the visitor:
$docstring = "$hitcount opskrifter";
$docstring = "en opskrift" if ($hitcount == 1);
$docstring = "ingen opskrifter" unless ($hitcount);
$summary = "<H2><TT>Resultat: $docstring fundet</TT></H2>\n";
$summary .= "<BLOCKQUOTE>\n<PRE>\n";
$summary .= " Indtastet ord: " if (@optional);
$i = 0;
foreach $term (@optional)
{
$term = "<I>$term</I>" unless ($term =~ /^\\W/);
$term =~ s/\\W//g;
$summary .= "$term";
$i++;
$summary .= ", " unless ($i == @optional);
}
$summary .= "\n Skal indeholde: " if (@required);
$i = 0;
foreach $term (@required)
{
$term = "<I>$term</I>" unless ($term =~ /^\\W/);
$term =~ s/\\W//g;
$summary .= "$term";
$i++;
$summary .= ", " unless ($i == @required);
}
$summary .= "\n Skal ikke indeholde: " if (@forbidden);
$i = 0;
foreach $term (@forbidden)
{
$term = "<I>$term</I>" unless ($term =~ /^\\W/);
$term =~ s/\\W//g;
$summary .= "$term";
$i++;
$summary .= ", " unless ($i == @forbidden);
}
$summary .= "\n</PRE></BLOCKQUOTE>\n";
open(SUMMARY,">>$summary_file");
print SUMMARY "Search by $ENV{'REMOTE_HOST'}:<BR>\n";
print SUMMARY $summary;
close(SUMMARY);
# Now that the webmaster knows what's going on, we print the
# results for the visitor:
print "Content-type: text/html\n\n";
print <<EOM;
<HTML>
<HEAD><TITLE></TITLE><base target="4"></HEAD>
<body background="
http://weblight.dk/opskriften/images/baggrunde/visningbag.gif" bgcolor="#FFFFFF" topmargin="0" link="#000000" vlink="#000000" alink="#FF0000">
<table border="0" width="420" cellspacing="0" cellpadding="0">
<tr>
<td>
$summary
<DL>
EOM
if ($hitcount > 0)
{
foreach $key (reverse sort keys %HITS)
{
$file = $HITS{$key};
$size = -s "$basedir$file";
if ($size > 1500)
{$size = int($size/1000) . " K";}
else
{$size = "$size bytes";}
$last = &Last_Modified("$basedir$file");
print "<P><DT><a href=\"$baseurl$file\"><STRONG>$titles{$file}</STRONG></a></DT>\n";
print "<DD>$description{$file}<BR>\n";
print "<CITE><A HREF=\"$baseurl$file\">$baseurl$file</A><FONT SIZE=-1>";
print " - $size - $last</FONT></CITE></DD>\n";
}
}
else
{
print <<EOM;
Der er ingen opskrifter der indeholder det indtastede ord!
EOM
}
print <<EOM;
</DL></td></tr></table>
</BODY></HTML>
EOM
}
sub Last_Modified
# This wonderful snippet was written by Jeff Carnahan of Terminal
# Productions (
www.terminalp.com)
{
$filename = shift;
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime((stat($filename))[9]);
@months = (Januar,Februar,Marts,April,Maj,Juni,Juli,August,September,Oktober,November,December);
return "$mday $months[$mon] $year";
}
sub bad_base
{
print "Content-type: text/html\n\n";
print "I tried to find the base directory you specified:\n";
print "<BLOCKQUOTE><PRE>$basedir</PRE></BLOCKQUOTE>\n";
print "But the system told me that it did not exist.\n";
exit;
}
*******************
Håber dette hjælper lidt
/Steen