#!/usr/bin/perl -w -CSDA # # snJmetadesc - meta description generator for Joomla! 1.5 # # Copyright (c) 2008-2009 EPIPE Communications # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # See for license terms. # # # Version: 1.4 # # Version history: # # - 1.4: changed the license to GPLv3 due to stupid new # JED requirements (2009-01-14) # # - 1.3: changed to use CMS::Joomla module (2008-11-15) # added metadescprefix configurable # # - 1.2: documentation improvements (2008-09-20) # # - 1.1: documentation improvements (2008-08-21) # # - 1.0: initial release (2008-08-19) # # Download location: # # http://dist.epipe.com/joomla/perl/ # # Author contact information: # # info # epipe.com # http://epipe.com/ # # Description: # # snJmetadesc connects to Joomla! 1.5 database and goes through all # articles. It extracts the beginning of each article content, removes # any markup and adds the resulting string as the meta description tag of # that article if none exists previously or if the original meta # description is also autogenerated by this script. When a meta # description of an article is changed, the script outputs the article # ID number followed by the new meta description. All articles are # processed, no matter what their status is (published or not). # # The script uses a specific character sequence "... " (excluding the # quotes but including the space) at the end of the meta description # field to indicate that it is autogenerated by this script. You can # also define your meta descriptions manually for some articles. In # that case do NOT type a space character at the end of the field # if you end it with three dots (in that case the next run of this # script would overwrite your manually written meta description). # You can easily customize this character sequence by editing the # script if you wish. # # Most Joomla! developers would call this a Search Engine Optimization # (SEO) tool, but this is really just an automatic meta description # generator. It is intended to be run manually after updating any # articles or periodically from cron(8). # # This script serves also as an example how Joomla! configuration # parameters and database can be accessed and manipulated from a perl # script. Those folks who think that PHP sucks but still wish or are # forced to use Joomla! might appreciate this as a basis for their # own perl based Joomla! tools. # # Requirements: # # - perl (recent version) # # - shell access to the Joomla! server # # - CMS::Joomla perl module (available from CPAN) # # - some common perl modules (available from CPAN) for database # access and HTML manipulation, see the "use" statements at the # beginning of the code # # Configuration: # # Have a look at the configuration variables at the beginning of the code # and edit them according to your needs. By default (as distributed # originally) this script is set to "dry run" mode (in other words # it does not do anything but it outputs the meta descriptions that # would have been created). # # If you wish to remove all autogenerated meta descriptions, just # set maxmetadesclen to zero. # # Usage: # # snJmetadesc /your/joomla/dir/configuration.php # # You can also list several configuration.php files. In that case # all of the corresponding web sites will be processed. # # Limitations: # # This script creates meta descriptions only for articles in the Joomla! # database. Those meta descriptions are displayed only on single article # pages. Other types of pages will be still generated by Joomla! using # the site global meta description (which Google does not like). # If there is no global definition, then the meta description on those # pages is left empty. # # It is not known how well this script works with Unicode UTF-8 # characters (which I suppose is the Joomla! database format) # outside of the standard US-ASCII subset used by English language # websites. The author hopes that the perl -CSDA options at the # first line make this script automagically fully UTF-8 compliant. # That is probably not the case. # # Safety Precautions: # # You should probably have basic perl, unix and database administration # skills (as well as the ability to read English language) before # running this script. You might want to take a backup of your # Joomla! database or go to the emergency shelters or whatever your # preferred safety precautions are. # use strict; use DBI; use HTML::Strip; use HTML::Entities; use CMS::Joomla; # configurables: my $maxmetadesclen = 250; # maximum meta description length my $minmetadesclen = 32; # minimum meta description length my $stripjoomla = 1; # whether to strip {joomla} tags my $cloackemail = 1; # whether to do (really simple) e-mail cloacking my $emitspaces = 1; # whether to emit spaces in place of HTML tags my $dryrun = 1; # if set, does not actually update database my $verbose = 1; # if set, outputs the generated metadescs my $metadescprefix = ''; # prefix string for generated meta descriptions # end of configuration variables sub makemetadesc ($) { my $text = shift; return undef unless defined($text); $text =~ s/{[^}\s]+}/ /g if $stripjoomla; my $hs = HTML::Strip->new(); $hs->set_emit_spaces($emitspaces); my $desc = $hs->parse($text); $hs->eof; decode_entities($desc); $desc =~ s/\S+@\S+//g if $cloackemail; $desc =~ s/\s+/ /g; $desc =~ s/^\s+//; if (length($desc) > $maxmetadesclen) { $desc = substr($desc, 0, $maxmetadesclen); $desc =~ s/\w+$//; } $desc =~ s/\s+$//; return undef if length($desc) < $minmetadesclen; return encode_entities($desc); } # sub gendescs ($) { my $joomla = CMS::Joomla->new(shift); return 0 unless defined($joomla); my $dbh = $joomla->dbhandle( { RaiseError => 1, AutoCommit => 0 } ); return 0 unless defined($dbh); my $sth = $dbh->prepare('SELECT id, introtext, metadesc ' . 'FROM ' . $joomla->cfg->{'dbprefix'} . 'content'); $sth->execute; my %updates; while (my ($id, $introtext, $metadesc) = $sth->fetchrow_array) { next if defined($metadesc) && $metadesc =~ /\S/ && $metadesc !~ /\.\.\. $/; my $newmetadesc = makemetadesc($introtext); next if !defined($newmetadesc) && !defined($metadesc); if (defined($newmetadesc)) { $newmetadesc = $metadescprefix . $newmetadesc . '... '; } else { $newmetadesc = ''; } next if defined($newmetadesc) && defined($metadesc) && $newmetadesc eq $metadesc; $updates{$id} = $newmetadesc; } $sth = $dbh->prepare('UPDATE ' . $joomla->cfg->{'dbprefix'} . 'content ' . 'SET metadesc = ? WHERE id = ?'); while (my ($id, $data) = each %updates) { print "$id: $data\n" if $verbose; $sth->execute($data, $id) unless $dryrun; } $dbh->commit; $dbh->disconnect; return 1; } # main() foreach (@ARGV) { gendescs($_); } # eof