Revision 107
- Date:
- 2011/08/02 22:34:17
- Files:
Legend:
- Added
- Removed
- Modified
-
utf8/core/lib/Contenido/Parser.pm
60 60 $content = <$fh>; 61 61 } 62 62 if ( $content ) { 63 warn Dumper($self); 64 63 unless ( $encoding ) { 65 64 $encoding = $self->__try_content_encoding( substr($content, 0, 350) ); 66 $self->{encoding} = $encoding; 67 65 if ( $encoding && $encoding ne 'utf-8' ) { 68 66 Encode::from_to($content, $encoding, 'utf-8'); 69 67 } 70 68 } 69 $self->{encoding} = $encoding; 70 warn Dumper($self) if $DEBUG; 71 71 $self->{content} = $content; 72 72 $self->{success} = 1; 73 73 } else { -
utf8/core/lib/Contenido/Parser/HTML.pm
147 147 my ($glue) = grep { $_->{command} eq 'glue' } @$parse_rools; 148 148 $self->__glue ( $chosen, $glue, $debug ) if ref $glue; 149 149 } 150 warn "Getting images...\n" if $debug; 150 151 my $images = $self->__get_images ( 151 152 structure => $shortcuts, 152 153 chosen => $chosen->[0], … … 288 289 sub __check_img_name { 289 290 my $name = shift; 290 291 my $test = $1 if $name =~ /\/([^\/]+)$/; 291 if ( $test =~ /\d+[x-]\d+/ || $test =~ /\.gif$/i ) { 292 if ( $test && ($test =~ /\d+[x-]\d+/ || $test =~ /\.gif$/i) ) { 292 293 return 1; 293 294 } 294 295 foreach my $word ( @PICNAME ) { 295 if ( $test =~ /^$word/si || $test =~ /[^a-z]$word[^a-z]/si ) { 296 if ( $test && ($test =~ /^$word/si || $test =~ /[^a-z]$word[^a-z]/si) ) { 296 297 return 1; 297 298 } 298 299 } … … 721 722 unless ( @ret ) { 722 723 warn "Nothing was found at all!!! Check your ROOLS or MINIMUM value" if $debug; 723 724 } 725 warn "Digging done!\n" if $debug; 724 726 return \@ret; 725 727 } 726 728 -
utf8/core/lib/Contenido/Parser/RSS.pm
51 51 my $debug = $DEBUG; 52 52 my $gui = delete $opts{gui}; 53 53 my $description_as_fulltext = delete $opts{description_as_fulltext}; 54 warn "Parser Rools: [".$opts{parser_rss}."]\n" if $debug; 54 warn "Parser Rools: [".$opts{parser_rss}."]\n" if $debug && $opts{parser_rss}; 55 55 56 56 my $rss_rools = $self->__parse_rools (delete $opts{parser_rss}); 57 57 58 warn "RSS Rools: ".Dumper ($rss_rools) if $debug; 58 warn "RSS Rools: ".Dumper ($rss_rools) if $debug && $rss_rools; 59 59 60 60 my @items; 61 61 my $feed = $self->__parse_content(\$content); … … 215 215 @videos = grep { exists $_->{type} && lc($_->{type}) eq 'video/x-flv' && $_->{src} =~ /\.flv$/i } @videos; 216 216 my @inlined_images; 217 217 for ( $description, $fulltext ) { 218 next unless $_; 218 219 my $field = $_; 219 220 while ( $field =~ /<img ([^>]+)>/sgi ) { 220 221 my $image = $self->__parse_params( $1 ); … … 1065 1066 while ($text =~ /<p>(.+?)(?=<\/?p>|$)/sgi) { 1066 1067 my $p = $1; 1067 1068 if (length $p > 50) { 1068 my ($dcount, $ndcount) = (); 1069 my ($dcount, $ndcount) = (0,0); 1069 1070 # Count sentences normally ended vs breaked 1070 1071 $dcount++ while $p =~ /(\.|\?|\!)['"]?\s*[\r\n]+/g; 1071 1072 $ndcount++ while $p =~ /([^\.\?\!\s])\s*[\r\n]+/g; -
utf8/core/lib/Contenido/Parser/Util.pm
30 30 my $text = shift; 31 31 32 32 if ( ref $text ) { 33 for ( $$text ) { 34 s/<\/?[^>]+>//sgi; 35 } 33 $$text =~ s/<\/?[^>]+>//sgi; 36 34 } else { 37 for ( $text ) { 38 s/<\/?[^>]+>//sgi; 39 } 35 $text =~ s/<\/?[^>]+>//sgi; 40 36 return $text; 41 37 } 42 38 }