gbp/rpm/changelog.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

# vim: set fileencoding=utf-8 :
#
# (C) 2014-2015 Intel Corporation <markus.lehtonen@linux.intel.com>
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, please see
#    <http://www.gnu.org/licenses/>
"""An RPM Changelog"""

import datetime
import re

import gbp.log


class ChangelogError(Exception):
    """Problem parsing changelog"""
    pass


class _ChangelogHeader(object):
    """The header part of one changelog section"""

    def __init__(self, pkgpolicy, time=None, **kwargs):
        self._pkgpolicy = pkgpolicy
        self._data = {'time': time}
        self._data.update(kwargs)

    def __contains__(self, key):
        return key in self._data

    def __getitem__(self, key):
        if key in self._data:
            return self._data[key]
        return None

    def __str__(self):
        keys = dict(self._data)
        keys['time'] = self._data['time'].strftime(
            self._pkgpolicy.Changelog.header_time_format)
        try:
            return self._pkgpolicy.Changelog.header_format % keys + '\n'
        except KeyError as err:
            raise ChangelogError("Unable to format changelog header, missing "
                                 "property %s" % err)


class _ChangelogEntry(object):
    """An entry (one 'change') in an RPM changelog"""

    def __init__(self, pkgpolicy, author, text):
        """
        @param pkgpolicy: RPM packaging policy
        @type pkgpolicy: L{RpmPkgPolicy}
        @param author: author of the change
        @type author: C{str}
        @param text: message of the changelog entry
        @type text: C{str} or C{list} of C{str}
        """
        self._pkgpolicy = pkgpolicy
        self.author = author
        if isinstance(text, str):
            self._text = text.splitlines()
        else:
            self._text = text
        # Strip trailing empty lines
        while text and not text[-1].strip():
            text.pop()

    def __str__(self):
        # Currently no (re-)formatting, just raw text
        string = ""
        for line in self._text:
            string += line + '\n'
        return string


class _ChangelogSection(object):
    """One section (set of changes) in an RPM changelog"""

    def __init__(self, pkgpolicy, *args, **kwargs):
        self._pkgpolicy = pkgpolicy
        self.header = _ChangelogHeader(pkgpolicy, *args, **kwargs)
        self.entries = []
        self._trailer = '\n'

    def __str__(self):
        text = str(self.header)
        for entry in self.entries:
            text += str(entry)
        # Add "section separator"
        text += self._trailer
        return text

    def set_header(self, *args, **kwargs):
        """Change the section header"""
        self.header = _ChangelogHeader(self._pkgpolicy, *args, **kwargs)

    def append_entry(self, entry):
        """Add a new entry to the end of the list of entries"""
        self.entries.append(entry)
        return entry


class Changelog(object):
    """An RPM changelog"""

    def __init__(self, pkgpolicy):
        self._pkgpolicy = pkgpolicy
        self.sections = []

    def __str__(self):
        string = ""
        for section in self.sections:
            string += str(section)
        return string

    def create_entry(self, *args, **kwargs):
        """Create and return new entry object"""
        return _ChangelogEntry(self._pkgpolicy, *args, **kwargs)

    def add_section(self, *args, **kwargs):
        """Add new empty section"""
        section = _ChangelogSection(self._pkgpolicy, *args, **kwargs)
        self.sections.insert(0, section)
        return section


class ChangelogParser(object):
    """Parser for RPM changelogs"""

    def __init__(self, pkgpolicy):
        self._pkgpolicy = pkgpolicy
        self.section_match_re = pkgpolicy.Changelog.section_match_re
        self.section_split_re = pkgpolicy.Changelog.section_split_re
        self.header_split_re = pkgpolicy.Changelog.header_split_re
        self.header_name_split_re = pkgpolicy.Changelog.header_name_split_re
        self.body_name_re = pkgpolicy.Changelog.body_name_re

    def raw_parse_string(self, string):
        """Parse changelog - only splits out raw changelog sections."""
        changelog = Changelog(self._pkgpolicy)
        ch_section = ""
        for line in string.splitlines():
            if re.match(self.section_match_re, line, re.M | re.S):
                if ch_section:
                    changelog.sections.append(ch_section)
                ch_section = line + '\n'
            elif ch_section:
                ch_section += line + '\n'
            else:
                raise ChangelogError("First line in changelog is invalid")
        if ch_section:
            changelog.sections.append(ch_section)
        return changelog

    def raw_parse_file(self, changelog):
        """Parse changelog file - only splits out raw changelog sections."""
        try:
            with open(changelog) as ch_file:
                return self.raw_parse_string(ch_file.read())
        except IOError as err:
            raise ChangelogError("Unable to read changelog file: %s" % err)

    def _parse_section_header(self, text):
        """Parse one changelog section header"""
        # Try to split out time stamp and "changelog name"
        match = re.match(self.header_split_re, text, re.M)
        if not match:
            raise ChangelogError("Unable to parse changelog header: %s" % text)
        try:
            time = datetime.datetime.strptime(match.group('ch_time'),
                                              "%a %b %d %Y")
        except ValueError:
            raise ChangelogError("Unable to parse changelog header: invalid "
                                 "timestamp '%s'" % match.group('ch_time'))
        # Parse "name" part which consists of name and/or email and an optional
        # revision
        name_text = match.group('ch_name')
        match = re.match(self.header_name_split_re, name_text)
        if not match:
            raise ChangelogError("Unable to parse changelog header: invalid "
                                 "name / revision '%s'" % name_text)
        kwargs = match.groupdict()
        return _ChangelogSection(self._pkgpolicy, time=time, **kwargs)

    def _create_entry(self, author, text):
        """Create a new changelog entry"""
        return _ChangelogEntry(self._pkgpolicy, author=author, text=text)

    def _parse_section_entries(self, text, default_author):
        """Parse entries from a string and add them to a section"""
        entries = []
        entry_text = []
        author = default_author
        for line in text.splitlines():
            match = re.match(self.body_name_re, line)
            if match:
                if entry_text:
                    entries.append(self._create_entry(author, entry_text))
                author = match.group('name')
            else:
                if line.startswith("-"):
                    if entry_text:
                        entries.append(self._create_entry(author, entry_text))
                    entry_text = [line]
                else:
                    if not entry_text:
                        gbp.log.info("First changelog entry (%s) is garbled, "
                                     "entries should start with a dash ('-')" %
                                     line)
                    entry_text.append(line)
        if entry_text:
            entries.append(self._create_entry(author, entry_text))

        return entries

    def parse_section(self, text):
        """Parse one section"""
        # Check that the first line(s) look like a changelog header
        match = re.match(self.section_split_re, text, re.M | re.S)
        if not match:
            raise ChangelogError("Doesn't look like changelog header: %s..." %
                                 text.splitlines()[0])
        # Parse header
        section = self._parse_section_header(match.group('ch_header'))
        header = section.header
        # Parse entries
        default_author = header['name'] if 'name' in header else header['email']
        for entry in self._parse_section_entries(match.group('ch_body'),
                                                 default_author):
            section.append_entry(entry)

        return section