#!/usr/bin/perl # # Copyright (c) Michel Klein 2006 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Any questions relating to this software should be directed to: # michel.klein@cs.vu.nl # use Text::BibTeX qw(:macrosubs :metatypes); use Digest::SHA1 qw(sha1 sha1_hex); use LWP::Simple; use CGI; my $home = "http://www.cs.vu.nl/~mcaklein/bib2rdf/"; add_macro_text(jan, "January"); add_macro_text(feb, "February"); add_macro_text(mar, "March"); add_macro_text(apr, "April"); add_macro_text(may, "May"); add_macro_text(jun, "June"); add_macro_text(jul, "July"); add_macro_text(aug, "August"); add_macro_text(sep, "September"); add_macro_text(oct, "October"); add_macro_text(nov, "November"); add_macro_text(dec, "December"); %tags = ("inbook", "Inbook", "incollection", "InCollection", "conference", "InProceedings", "inproceedings", "InProceedings", "mastersthesis", "MasterThesis", "phdthesis", "PhDThesis", "techreport", "TechnicalReport", "unpublished", "Unpublished"); $query = new CGI; $url = $query->param('url'); $abs = (!($query->param('relative') eq 'on')); $label = (!($query->param('nolabel') eq 'on')); if ($url eq "") { print $query->redirect($home); exit; } $fn="tmp/".getppid.".bib"; @lines = get($url); if (!(@lines)) { print "Content-type: text/html\n\n"; print "

Error

Cannot find $url."; exit; } open(BIB, ">$fn"); foreach $line (@lines) { $line =~ s/\r//g; $line =~ s/and$/and /g; print BIB $line; } close(BIB); print "Content-type: text/xml; charset=utf-8\n\n"; print < EOH # Read bibtex from file $bibfile = new Text::BibTeX::File $fn; while ($entry = new Text::BibTeX::Entry $bibfile) { next unless $entry->parse_ok; next unless $entry->metatype == BTE_REGULAR; @fields = $entry->fieldlist(); $type = &field2tag($entry->type()); $key = $entry->key(); $key =~ s/:/_/g; if ($abs) { print "<$type rdf:about=\"$url#$key\">\n"; } else { print "<$type rdf:ID=\"$key\">\n"; } print " \n"; undef @authors; undef @ids; undef $title; undef $journal; undef $volume; undef $number; undef $booktitle; undef $publisher; undef $month; undef $year; for $field (@fields) { $value = $entry->get($field); if ($value ne "") { if (!(($field eq 'url') || ($field eq 'howpublished') || ($field eq 'note'))) { $value = &deTex($value); } # Clean-up tex in url-field if ($field eq 'url') { $value =~ s/\\~{}/~/g; $value =~ s/\"//g; if ($value =~ /\\url/) { $value =~ s/\\url{//g; $value =~ s/}$//g; } } # Cleanup month-field if ($field eq 'month') { # Fix for bug in Text::BibTex which leaves October out if ($value !~ /[a-zA-Z]/) { $value = "October $value"; } else { # Add space between month and day $value =~ s/(\D+)(\d.*)/$1 $2/g; } } # Encode XML specific characters $value = &encodeXML($value); # Turn \url{} into HTML url's for abstracts and notes if (($field eq 'note') || ($field eq 'abstract')) { if ($value =~ /\\url{.*}/ ) { $value =~ s/\\url{(.*)}/$1<\/a>/g; } } # Read values if ($label) { if ($field eq 'title') { $title = $value; } if ($field eq 'journal') { $journal = $value; } if ($field eq 'volume') { $volume = $value; } if ($field eq 'number') { $number = $value; } if ($field eq 'booktitle') { $booktitle = $value; } if ($field eq 'publisher') { $publisher = $value; } if ($field eq 'month') { $month = $value; } if ($field eq 'year') { $year = $value; } } # Parse indivudual names in author and editor fields if (($field eq 'author') || ($field eq 'editor')) { @names = $entry->split($field); if ($field eq 'author') { @authors = @names; } # Print individual author/editor ID's and store their ID and fullname for $name (@names) { $name =~ s/(^\s+|\s+$)//; $name = &deTex($name); $id = "#".&storeName('person', $name); @ids = (@ids, $id); if ($abs) {$id = "$url$id";} print " \n"; } # Print ordered list of author/editor ID's $listid = sha1_hex($url.$field.$key); print " \n"; print " \n"; for $id (@ids) { if ($abs) {$id = "$url$id";} print " \n"; } print " \n"; print " \n"; undef @ids; } elsif (($field eq 'organization') || ($field eq 'publisher') || ($field eq 'institution')) { $id = "#".&storeName('organization', $value); if ($abs) {$id = "$url$id";} print " \n"; } elsif (($field eq 'abstract') || ($field eq 'note')) { print " $value\n"; } elsif ($field eq 'url') { print " \n"; } elsif ($field eq 'howpublished') { if ($value =~ /\\url{(.*)}/ ) { print " \n"; } else { $value = &deTex($value); print " $value\n"; } } else { print " $value\n"; } } } if ($label) { &printLabel; } print "\n\n"; } # Print RDF descriptions for all persons while (($id, $name) = each(%persons)) { if ($abs) { print "\n"; } else { print "\n"; } print " \n"; #if ($label) { print " $name\n"; } if ( ($name =~ /,.+,/) || ($name =~ /;/) ) { $name = "CONCATENATED-AUTHOR-NAMES"; # ($name)"; } print " $name\n\n\n"; } # Print RDF descriptions for all organisations while (($id, $name) = each(%organizations)) { if ($abs) { print "\n"; } else { print "\n"; } #if ($label) { print " $name\n"; } print " $name\n\n\n"; } print "\n"; unlink($fn); # Subroutine to make all data PCDATA sub encodeXML { $value = $_[0]; #XML encodings $value =~ s/&/&/g; $value =~ s//>/g; # convert to UTF $value =~ s/([^\x20-\x7F])/'&#' . ord($1) . ';'/gse; # remove allready encoded UTF (<32) $value =~ s/&#[0-31];//gse; $value; } # Subroutine to remove TeX specific characters sub deTex { $value = $_[0]; $value =~ s/~/ /g; $value =~ s/(\\'|\\"|\\|{|}|,$)//g; $value =~ s///g; $value =~ s/--/-/g; $value =~ s/(``|'')/"/g; $value; } # Subroutine to create stack of persons / indivuduals; # returns a string that can be used as identifier in RDF sub storeName { $name = $_[1]; $mkey = $name; $name = &encodeXML($name); $mkey =~ s/&/and/g; $mkey =~ s/\s+/_/g; $mkey =~ s/(:|\/)/_/g; $mkey =~ s/(;|&|,|\.|\#)//g; $mkey =~ tr/A-Z/a-z/; $mkey =~ s/([^\x20-\x7F])//gse; if ($_[0] eq 'person') { %persons = (%persons, $mkey, $name); } elsif ($_[0] eq 'organization') { %organizations = (%organizations, $mkey, $name); } $mkey; } # Subroutine to translate unknown fields in the bibtex-file to # properties of publications sub field2tag { # this should be generalized to allow arbitrary translations $field = $tags{$_[0]}; if ($field eq "") { $field = "\u$_[0]"; } "ow:$field"; } # Subroutine to create an RDF label with a readable summary of # the bibtex entry sub printLabel { undef $label; if (@authors) { for $name (@authors) { $name = &deTex($name); $name = &encodeXML($name); $label = $label . $name . ', '; } $label =~ s/, $/. /; } $label = $label . $title if ($title); $label = $label . ". $journal" if ($journal); if ($volume) { $label = $label . " $volume"; $label = $label . "($number)" if ($number); } $label = $label . ". In: $booktitle" if ($booktitle); $label = $label . ", number $number" if ($number && !($volume)); $label = $label . ", $publisher" if ($publisher); $label = $label . ", $month" if ($month); $label = $label . ", $year" if ($year); print " $label\n"; }