#!/usr/bin/perl -w -CSDA
#
# snJxml2xslt2joomla - Fetch XML feed through XSLT to Joomla! DB
#
# Copyright (c) 2008-2009 EPIPE Communications
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# See for license terms.
#
#
# Version: 1.1
#
# Version history:
#
# - 1.1: changed the license to GPLv3 due to stupid new
# JED requirements (2009-03-27)
#
# - 1.0: initial release (2008-11-16)
#
# Download location:
#
# http://dist.epipe.com/joomla/perl/
#
# Author contact information:
#
# info # epipe.com
# http://epipe.com/
#
# Description:
#
# snJxml2xslt2joomla fetches an XML feed (such as RSS, Atom, sitemap.xml
# or any other XML data) and processes it through XSL transformation
# (XSLT) and saves the result to article or module entry in Joomla!
# database.
#
# Requirements:
#
# - perl
#
# - shell access to the Joomla! server
#
# - CMS::Joomla perl module (available from CPAN)
#
# - some common perl modules (available from CPAN) for database
# access and XML manipulation, see the "use" statements at the
# beginning of the code
#
# Configuration:
#
# You can freely define your output in the XSL file. It has to match
# the incoming XML feed.
#
# Usage:
#
# snJxml2xslt2joomla /joomla/configuration.php ID \
# /some/file.xsl http://www.example.org/feed.xml
#
# ID should be an ID number of an existing article in the Joomla!
# database.
#
# If you wish to update module contents, prefix the ID number with
# "m" and you will be updating the contents of the corresponding
# module. This mostly makes sense with custom HTML module.
#
# Please make sure to use the correct number, because old existing
# contents will get overwritten.
#
# Example:
#
# snJxml2xslt2joomla /your/joomla/configuration.php 555 \
# snJxml2xslt2joomla-example.xsl http://feeds.joomla.org/JoomlaExtensions
#
# The example XSL file is available at: http://dist.epipe.com/joomla/perl/
#
# This example takes a list of recent Joomla! extensions, does an
# XSL transformation to it to produce HTML and updates the
# contents of a pre-existing article number 555 with the output.
#
# Limitations:
#
# It is not known how well this script works with Unicode UTF-8
# characters (which I suppose is the Joomla! database format)
# outside of the standard US-ASCII subset used by English language
# websites. The author hopes that the perl -CSDA options at the
# first line make this script automagically fully UTF-8 compliant.
# That is probably not the case.
#
use strict;
use XML::LibXML;
use XML::LibXSLT;
use LWP::UserAgent;
use DBI;
use CMS::Joomla;
#
sub process_feed ($$) {
my ($feed_url) = shift;
my ($xslt_file) = shift;
my ($ua) = LWP::UserAgent->new;
my ($feed_req) = HTTP::Request->new(GET => $feed_url);
my ($feed_res) = $ua->request($feed_req);
die "can not retrieve $feed_url" unless defined($feed_res);
die "can not retrieve $feed_url (" . $feed_res->status_line . ")"
unless $feed_res->is_success;
my ($xml_parser) = XML::LibXML->new();
my ($xslt_engine) = XML::LibXSLT->new();
my ($feed_xml) = $xml_parser->parse_string($feed_res->content);
die "can not parse $feed_url" unless defined($feed_xml);
my ($xslt_xml) = $xml_parser->parse_file($xslt_file);
die "can not parse $xslt_file" unless defined($xslt_xml);
my ($stylesheet) = $xslt_engine->parse_stylesheet($xslt_xml);
die "can not parse $xslt_file" unless defined($stylesheet);
my ($output) = $stylesheet->transform($feed_xml);
die "can not transform $feed_url" unless defined($output);
return $stylesheet->output_string($output);
}
#
sub update_joomla ($$$) {
my ($joomla) = CMS::Joomla->new(shift);
die "can not parse Joomla configuration" unless defined($joomla);
my ($id) = shift;
my ($output) = shift;
my ($dbh) = $joomla->dbhandle( { RaiseError => 1, AutoCommit => 1 } );
die "can not connect to Joomla database" unless defined($dbh);
if ($id =~ /^a?(\d+)$/) { # update article
my ($aid) = $1;
my ($sth) = $dbh->prepare('UPDATE ' . $joomla->dbprefix . 'content '
. 'SET introtext = ?, modified = NOW() WHERE id = ?');
$sth->execute($output, $aid)
or die "can not update article id $aid in Joomla database";
} elsif ($id =~ /^m(\d+)$/) { # update module
my ($mid) = $1;
my ($sth) = $dbh->prepare('UPDATE ' . $joomla->dbprefix . 'modules '
. 'SET content = ? WHERE id = ?');
$sth->execute($output, $mid)
or die "can not update module id $mid in Joomla database";
} else {
die "invalid id $id";
}
}
# main()
if (@ARGV != 4) {
print STDERR "usage: $0 /joomla/configuration.php ID file.xsl "
. "http://www.example.org/feed.xml\n";
exit(2);
}
my ($output) = process_feed($ARGV[3], $ARGV[2]);
die "can not fetch or process $ARGV[3]" unless defined($output);
update_joomla($ARGV[0], $ARGV[1], $output)
or die "can not update Joomla database";
# eof