package HTML::FormatText;

# ABSTRACT: Format HTML as plaintext

use 5.006_001;
use strict;
use warnings;

# We now use Smart::Comments in place of the old DEBUG framework.
# this should be commented out in release versions....
##use Smart::Comments;

use base 'HTML::Formatter';

our $VERSION = '2.12'; # VERSION

# ------------------------------------------------------------------------
sub default_values {
    (   shift->SUPER::default_values(),
        lm => 3,     # left margin
        rm => 72,    # right margin (actually, maximum text width)

# ------------------------------------------------------------------------
sub configure {
    my ( $self, $hash ) = @_;

    my $lm = $self->{lm};
    my $rm = $self->{rm};

    $lm = delete $hash->{lm}          if exists $hash->{lm};
    $lm = delete $hash->{leftmargin}  if exists $hash->{leftmargin};
    $rm = delete $hash->{rm}          if exists $hash->{rm};
    $rm = delete $hash->{rightmargin} if exists $hash->{rightmargin};

    my $width = $rm - $lm;
    if ( $width < 1 ) {
        warn "Bad margins, ignored" if $^W;
    if ( $width < 20 ) {
        warn "Page probably too narrow" if $^W;

    for ( keys %$hash ) {
        warn "Unknown configure option '$_'" if $^W;

    $self->{lm} = $lm;
    $self->{rm} = $rm;

# ------------------------------------------------------------------------
sub begin {
    my $self = shift;

    $self->{curpos} = 0;    # current output position.
    $self->{maxpos} = 0;    # highest value of $pos (used by header underliner)
    $self->{hspace} = 0;    # horizontal space pending flag

# ------------------------------------------------------------------------
sub end {

# ------------------------------------------------------------------------
sub header_start {
    my ( $self, $level ) = @_;

    $self->vspace( 1 + ( 6 - $level ) * 0.4 );
    $self->{maxpos} = 0;

# ------------------------------------------------------------------------
sub header_end {
    my ( $self, $level ) = @_;

    if ( $level <= 2 ) {
        my $line;
        $line = '=' if $level == 1;
        $line = '-' if $level == 2;
        $self->out( $line x ( $self->{maxpos} - $self->{lm} ) );

# ------------------------------------------------------------------------
sub bullet {
    my $self = shift;

    $self->SUPER::bullet( $_[0] . ' ' );

# ------------------------------------------------------------------------
sub hr_start {
    my $self = shift;

    $self->out( '-' x ( $self->{rm} - $self->{lm} ) );

# ------------------------------------------------------------------------
sub pre_out {
    my $self = shift;

    # should really handle bold/italic etc.
    if ( defined $self->{vspace} ) {
        if ( $self->{out} ) {
            $self->nl() while $self->{vspace}-- >= 0;
            $self->{vspace} = undef;
    my $indent = ' ' x $self->{lm};
    my $pre    = shift;
    $pre =~ s/^/$indent/mg;

# ------------------------------------------------------------------------
sub out {
    my $self = shift;
    my $text = shift;

    $text =~ tr/\xA0\xAD/ /d;

    if ( $text =~ /^\s*$/ ) {
        $self->{hspace} = 1;

    if ( defined $self->{vspace} ) {
        if ( $self->{out} ) {
            $self->nl while $self->{vspace}-- >= 0;
        $self->{vspace} = undef;
        $self->{hspace} = 0;

    if ( $self->{hspace} ) {
        if ( $self->{curpos} + length($text) > $self->{rm} ) {

            # word will not fit on line; do a line break
        else {

            # word fits on line; use a space
            $self->collect(' ');
        $self->{hspace} = 0;

    my $pos = $self->{curpos} += length $text;
    $self->{maxpos} = $pos if $self->{maxpos} < $pos;

# ------------------------------------------------------------------------
sub goto_lm {
    my $self = shift;

    my $pos = $self->{curpos};
    my $lm  = $self->{lm};
    if ( $pos < $lm ) {
        $self->{curpos} = $lm;
        $self->collect( " " x ( $lm - $pos ) );

# ------------------------------------------------------------------------
sub nl {
    my $self = shift;

    $self->{curpos} = 0;

# ------------------------------------------------------------------------
sub adjust_lm {
    my $self = shift;

    $self->{lm} += $_[0];

# ------------------------------------------------------------------------
sub adjust_rm {
    shift->{rm} += $_[0];




=for test_synopsis 1;

=for stopwords latin1 leftmargin lm plaintext rightmargin rm CPAN homepage

=head1 NAME

HTML::FormatText - Format HTML as plaintext

=head1 VERSION

version 2.12


    use HTML::TreeBuilder;
    $tree = HTML::TreeBuilder->new->parse_file("test.html");

    use HTML::FormatText;
    $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50);
    print $formatter->format($tree);

or, more simply:

    use HTML::FormatText;
    my $string = HTML::FormatText->format_file(
        leftmargin => 0, rightmargin => 50


HTML::FormatText is a formatter that outputs plain latin1 text. All character
attributes (bold/italic/underline) are ignored. Formatting of HTML tables and
forms is not implemented.

HTML::FormatText is built on L<HTML::Formatter> and documentation for that
module applies to this - especially L<HTML::Formatter/new>,
L<HTML::Formatter/format_file> and L<HTML::Formatter/format_string>.

You might specify the following parameters when constructing the formatter:

=over 4

=item I<leftmargin> (alias I<lm>)

The column of the left margin. The default is 3.

=item I<rightmargin> (alias I<rm>)

The column of the right margin. The default is 72.


=head1 SEE ALSO



See perlmodinstall for information and options on installing Perl modules.


You can make new bug reports, and view existing ones, through the
web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Format>.


The project homepage is L<https://metacpan.org/release/HTML-Format>.

The latest version of this module is available from the Comprehensive Perl
Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
site near you, or see L<https://metacpan.org/module/HTML::Format/>.

=head1 AUTHORS

=over 4

=item *

Nigel Metheringham <nigelm@cpan.org>

=item *

Sean M Burke <sburke@cpan.org>

=item *

Gisle Aas <gisle@ActiveState.com>



This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.



