1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161 | #!/usr/bin/python3 -u
# -*- coding: utf-8 -*-
# Copyright (C) 2015- Tiago Stürmer Daitx <tiago.daitx@canonical.com>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
__author__ = 'Tiago Stürmer Daitx'
__version__ = '0.1'
import os
import sys
import argparse
import apt_pkg
from functools import cmp_to_key
from itertools import chain
def read_sources(filename, intern=sys.intern):
"""Read the list of source packages from the specified file
The source packages are read from the `Sources' file within the
directory specified as `basedir' parameter. Considering the
large amount of memory needed, not all the fields are loaded
in memory. The available fields are Version and Binary.
The method returns a dictionary whose keys represents a source
package name and value contains another dictionary, with version
as key and binaries as values.
"""
sources = {}
Packages = apt_pkg.TagFile(open(filename))
get_field = Packages.section.get
step = Packages.step
while step():
if get_field('Extra-Source-Only', 'no') == 'yes':
# Ignore sources only referenced by Built-Using
continue
pkg = get_field('Package')
ver = intern(get_field('Version'))
deps = set(get_field('Binary').split(', '))
if pkg in sources:
sources[intern(pkg)][ver] = deps
else:
sources[intern(pkg)] = {ver: deps}
return sources
#return {k:v for (k,v) in sources.items() if len(v) > 1}
def read_binaries(file_in, ignore_packages=[], intern=sys.intern):
"""Read packages from the specified file, optionally filter a few by name
The binary packages are read from the specificed file. If
duplicated packages are found, the one with latest version
is kept. Package with names in the ignore_packages list
are ignored.
Returns a list where each item is the package, version, and
offset in the specified file.
"""
packages = {}
Packages = apt_pkg.TagFile(open(file_in))
get_field = Packages.section.get
step = Packages.step
while step():
pkg = get_field('Package')
version = get_field('Version')
if pkg in ignore_packages:
continue
# keep only latest version
if pkg in packages and apt_pkg.version_compare(packages[pkg][0], version) > 0:
continue
pkg = intern(pkg)
version = intern(version)
size = Packages.section.bytes() + 1 # do include the new line
offset = Packages.offset() - size
packages[pkg] = [version, offset, size]
return packages
def write_binaries(file_in, file_out, packages):
"""Read from the specified file and print to stdout only the given packages
The binary packages are read from the file_in param file,
The method outputs the given packages to stdout.
Write package list into file_out param.
"""
with open(file_in, mode="rb") as fin:
with open(file_out, mode="wb") as output:
for name, pkg in packages.items():
fin.seek(pkg[1])
output.write(fin.read(pkg[2]))
def get_old_binaries(sources):
"""Get a set of binaries from older versions of duplicated source packages
For any package that has two or more versions, computes the
binaries that are _not_ in the latest version. Packages with
only one version are ignored.
The method returns a set where every item represents a binary
package that is not in the "Binaries:" list of the highest
source package available.
"""
old_binaries = set()
version_compare=cmp_to_key(apt_pkg.version_compare)
for pkg, version_dict in sources.items():
if len(version_dict) < 2:
continue
highest_version = max(version_dict.keys(),key=version_compare)
old_binaries |= set(chain.from_iterable(version_dict.values())) \
- version_dict[highest_version]
return old_binaries
if __name__ == '__main__':
# initialize the apt_pkg back-end
apt_pkg.init()
parser = argparse.ArgumentParser()
parser.add_argument("source", help="path to the APT Source file and Binary files")
parser.add_argument("binaries", nargs="+", help="path to the APT Bines file(s) to be processed")
parser.add_argument("-o", "--outdir", nargs="?", help="path to directory to write the processed binary files", default="./")
args = parser.parse_args()
ignore_packages = get_old_binaries(read_sources(args.source))
for binary in args.binaries:
output_filename = os.path.join(args.outdir, os.path.basename(binary))
overwrite = False
if os.path.exists(binary) and os.path.exists(output_filename) and os.path.samefile(binary, output_filename):
overwrite = True
output_filename = ".%s.tmp" % [output_filename]
packages = read_binaries(binary, ignore_packages=ignore_packages)
#for pkg in packages:
# print("Package: " + pkg)
write_binaries(binary, output_filename, packages)
if overwrite:
os.rename(output_filename, binary)
|