#!/usr/bin/env python
# -*- coding: utf-8 -*-

### BEGIN LICENSE
# Copyright © 2012 Vsevolod Velichko <torkvema@gmail.com>
# Copyright © 2012 Carlos da Costa <c.costa@outlook.com>
# Copyright © 2012 Erik Christiansson <erik@christiansson.net>
# This program is free software: you can redistribute it and/or modify it 
# under the terms of the GNU General Public License version 3, as published 
# by the Free Software Foundation.
# 
# This program is distributed in the hope that it will be useful, but 
# WITHOUT ANY WARRANTY; without even the implied warranties of 
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR 
# PURPOSE.  See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along 
# with this program.  If not, see <http://www.gnu.org/licenses/>.
### END LICENSE

from . import TrackingService

import re
from dateutil.parser import parse as parsedate
import htmlentitydefs

def unescape(text):
    """Removes HTML or XML character references and entities from a text string."""
    # @param text The HTML (or XML) source text.
    # @return The plain text, as a Unicode string, if necessary.
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is
    return re.sub("&#?\w+;", fixup, text)

class UspsComService(TrackingService):
    name = "USPS.com"
    url = 'https://tools.usps.com/go/TrackConfirmAction_input?qtc_tLabels1=%(number)s&qtc_senddate1=&qtc_zipcode1='
    def _parse_page(self, html):
        html = html.decode('utf-8', 'ignore')
        res = re.search(r'<table id="tc-hits".*?<tbody>(.*?)</tbody>', html, re.DOTALL)
        if res is None:
            return []
        html = res.group(1)
        result = []
        for res in re.findall(r'<div class="td-status">\s*<p>(.*?)</p>.*?<div class="td-date-time">\s*<p>(.*?)</p>.*?<div class="td-location">\s*<p>(.*?)</p>', html, re.DOTALL):
            operation = unescape(res[0].strip())
            opdate = parsedate(unescape(res[1].strip()))
            location = unescape(res[2].strip())
            result.append((operation, opdate, location))
        return result

