#!/usr/bin/perl # # Script to convert a csv representation (plain text file with ';' as field # delimiter) of the UNSPSC codelist to an RDF Schema or DAML+OIL ontology. A csv # representation can be created in Microsoft Excel by "Save as" "CVS (Comma # delimited)" # The public version of the UNSPSC Code can be obtained from: # http://www.eccma.org/downloads.php3 # # Michel Klein # $Id: convert.pl,v 1.4 2001/12/21 14:57:22 mcaklein Exp $ use Getopt::Std; # Read options and print help getopts('demh:'); if ((@ARGV != 1) || ($opt_h)) { print STDERR < Converts a csv representation (plain text file with ';' as field delimiter) of the UNSPSC codelist to an RDF Schema or DAML+OIL ontology. Prints to standard output. Valid options are: -d use DAML+OIL syntax instead of RDF Schema syntax -m use the member version of UNSPSC Code; default the public version is used. -e use the EGCI code as identifier; default the "UNSPSC title" is used as rdf:ID (only valid if combined with 'm') -h print this help message and exit EOT exit; } if ($opt_e && !$opt_m) {print STDERR "Option -e ignored\n";} # Define namespace to use as UNSPSC meta schema definition $unns = "http://ontoview.org/schema/unspsc/1#"; # Open file and print header open(FILE, $ARGV[0]) || die "Error: cannot open: $!"; print "\n"; print "\n\n"; print < DAML+OIL representation of the UNSPSC code definition. See http://www.eccma.org/ EOT $ns = "daml"; } else { print " xmlns:unspsc=\"".$unns."\">\n\n"; print "\n"; $ns = "rdfs"; } # Read first line of csv file (headers) to check the variant $line = ; if ($opt_m) { if (index($line,'EGCI') != 0) { print STDERR "Not a member version!\n"; exit; } } else { if (index($line,'Segment') != 0) { print STDERR "Not a public version!\n"; exit; } } # Read all subsequent lines @lines = ; for (@lines) { # Split the line at the ';' symbol if ($opt_m) { ($egci, $segment, $family, $class, $commodity, $title, $add_version, $add_date) = split(/\s*\;\s*/); } else { ($segment, $family, $class, $commodity, $bti, $title, $add_version, $add_date) = split(/\s*\;\s*/); } # Prepend '0' if necessary if (length($segment) == 1) { $segment = "0$segment"; } if (length($family) == 1) { $family = "0$family"; } if (length($class) == 1) { $class = "0$class"; } if (length($commodity) == 1) { $commodity = "0$commodity"; } # Construct UNSPSC code $code = "$segment.$family.$class.$commodity"; if ($opt_e && $opt_m) { # Use EGCI code as identifier $id = $egci; } else { # Use title as rdf:ID and replace spaces by underscores $title =~ s/ /_/g; $id = $title; } # Create table to lookup identifier that belong to a code $codes{$code} = $id; # Calculate the UNSPSC code of the superclass if ($commodity != 00) { $supercode = "$segment.$family.$class.00"; $type = "Commodity";} elsif ($class != 00) { $supercode = "$segment.$family.00.00"; $type = "Class";} elsif ($family != 00) { $supercode = "$segment.00.00.00"; $type = "Family";} else { $supercode = "00.00.00.00"; $type = "Segment";} # Lookup the ID of superclass in table if ($supercode == "00.00.00.00") { #$superclass = "Thing"} $superclass = "http://www.w3.org/2000/01/rdf-schema#Resource"} else {$superclass = "\#".$codes{$supercode}}; print "\n<$ns:Class rdf:ID=\"$id\">\n"; print " \n"; print " \n"; if ($opt_e && $opt_m) { print " $title\n";} elsif ($opt_m) { print " $egci\n";} print " $code\n"; #print " $add_date\n"; print "\n"; } close(FILE); print "\n\n";