1 #!/usr/bin/perl -wT
2
3 # xml-fetch.pl - query tags in an HTML or XML file
4 # by Jonathan Eisenzopf. v1.0 990214
5 # Copyright (c) 2012 quinstreet.com LLC. All Rights Reserved.
6 # Originally published and documented at https://www.webreference.com
7 # You may use this code on a Web site only if this entire
8 # copyright notice appears unchanged and you publicly display
9 # on the Web site a link to https://www.webreference.com/perl/.
10 #
11 # Contact eisen@quinstreet.com for all other uses.
12
13 use strict;
14 use CGI;
15 use HTTP::Request;
16 use LWP::UserAgent;
17
18 my ($key,$attr,%h_attr,$text,@entities,$content);
19 my $entity = "";
20
21 my $query = new CGI;
22 print $query->header;
23
24 &printError("You must complete both fields!")
25 unless ($query->param('url') && $query->param('fields'));
26
27 my $ua = new LWP::UserAgent;
28 $ua->agent("xml-fetch/1.0");
29 $ua->max_size([1000000]);
30
31 my $request = new HTTP::Request GET => $query->param('url');
32 my $response = $ua->request($request);
33
34 &printError($response->code.": Error retrieving URL ".$query->param('url'))
35 unless ($response->code == 200);
36
37 @entities = split(/,/,$query->param('fields'));
38 $content = $response->content;
39
40 &Print_Header;
41 foreach $entity (@entities) {
42 &Print_Entity_Head($entity);
43 while ($content =~ /<$entity\s*(.*?)(\/>|>(.*?)<\/$entity>)/gsi) {
44 ($text,%h_attr) = "";
45 my @attribs = split(/"\s+/,$1);
46 $text = $3 if $3;
47 foreach $attr (@attribs) {
48 next if !$attr;
49 my ($key,$value) = split(/=/,$attr);
50 $value =~ s/\"//g;
51 $h_attr{$key} = $value;
52 }
53 &Print_Element(\%h_attr,$text);
54 }
55 }
56
57 print "