Skip to content

Commit b6a5357

Browse files
committed
Add route, view and helpers for exporting d/copyright to spdx
1 parent e634464 commit b6a5357

File tree

7 files changed

+279
-4
lines changed

7 files changed

+279
-4
lines changed

debsources/app/copyright/routes.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212
from __future__ import absolute_import
1313

1414

15-
from flask import jsonify
15+
from flask import jsonify, make_response
1616

1717
from ..helper import bind_render
1818
from . import bp_copyright
1919
from ..views import (IndexView, PrefixView, ListPackagesView, ErrorHandler,
2020
Ping, PackageVersionsView, DocView, AboutView, SearchView)
21-
from .views import LicenseView, ChecksumLicenseView, SearchFileView, StatsView
21+
from .views import (LicenseView, ChecksumLicenseView, SearchFileView,
22+
StatsView, SPDXView)
2223

2324

2425
# context vars
@@ -254,3 +255,11 @@ def skeleton_variables():
254255
render_func=jsonify,
255256
err_func=ErrorHandler(mode='json'),
256257
get_objects='stats_suite'))
258+
259+
# SDPX view
260+
bp_copyright.add_url_rule(
261+
'/spdx/<path:path_to>/',
262+
view_func=SPDXView.as_view(
263+
'spdx',
264+
render_func=make_response,
265+
err_func=ErrorHandler('copyright')))
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{#
2+
Copyright (C) 2015 The Debsources developers <info@sources.debian.net>.
3+
See the AUTHORS file at the top-level directory of this distribution and at
4+
https://anonscm.debian.org/gitweb/?p=qa/debsources.git;a=blob;f=AUTHORS;hb=HEAD
5+
License: GNU Affero General Public License, version 3 or above.
6+
#}
7+
{% extends name+"/base.html" %}
8+
9+
{% block title %}404{% endblock %}
10+
{% block content %}
11+
<h2>{{ self.title() }}</h2>
12+
<p>The debian/copyright file has a file paragraph without the <b>required</b> copyright field. The files paragraph is:
13+
<ul>
14+
{% for files in paragraph %}
15+
<li>{{ files }}</li>
16+
{% endfor %}
17+
</ul>
18+
</p>
19+
<a href="/">Go home</a>
20+
21+
{% endblock %}

debsources/app/copyright/templates/copyright/license.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ <h2>{{ self.title() }} / {{ version }}</h2>
2727
{% if dump == 'True' %}
2828
{% include "source_file_code.inc.html" %}
2929
{% else %}
30+
<div class="warning"><a href="{{url_for('.spdx', path_to=package + '/' + version) }}">Export to SPDX</a></div>
3031
{% include "copyright/license_render.inc.html" %}
3132
{% endif %}
3233
{% endblock %}

debsources/app/copyright/views.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,55 @@ def get_stats(self):
339339
dual_results=dual_res,
340340
dual_licenses=sorted(dual_licenses),
341341
suites=all_suites)
342+
343+
344+
class SPDXView(GeneralView):
345+
346+
def _generate_file(self, spdx_values):
347+
output = ''
348+
for value in spdx_values:
349+
output += value.decode('utf-8') + '\n'
350+
return output
351+
352+
def get_objects(self, path_to):
353+
path_dict = path_to.split('/')
354+
if len(path_dict) != 2:
355+
raise Http404Error()
356+
357+
package = path_dict[0]
358+
version = path_dict[1]
359+
path = '/'.join(path_dict[2:])
360+
361+
if version == "latest": # we search the latest available version
362+
return self._handle_latest_version(request.endpoint,
363+
package, path)
364+
365+
versions = self.handle_versions(version, package, path)
366+
if versions:
367+
redirect_url_parts = [package, versions[-1]]
368+
if path:
369+
redirect_url_parts.append(path)
370+
redirect_url = '/'.join(redirect_url_parts)
371+
return self._redirect_to_url(request.endpoint,
372+
redirect_url, redirect_code=302)
373+
374+
try:
375+
sources_path = helper.get_sources_path(session, package, version,
376+
current_app.config)
377+
except FileOrFolderNotFound:
378+
raise Http404ErrorSuggestions(package, version,
379+
'debian/copyright')
380+
except InvalidPackageOrVersionError:
381+
raise Http404ErrorSuggestions(package, version, '')
382+
383+
try:
384+
c = helper.parse_license(sources_path)
385+
except Exception:
386+
# non machine readable license
387+
return dict(return_code=404)
388+
spdx = helper.export_copyright_to_spdx(
389+
c, session=session, package=package, version=version)
390+
attachment = "attachment;" + "filename=" + \
391+
path_to.replace('/', '_') + ".spdx"
392+
return dict(spdx=self._generate_file(spdx),
393+
header=attachment)

debsources/app/views.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919
from debian.debian_support import version_compare
2020

2121
from flask import (
22-
current_app, jsonify, render_template, request, url_for, redirect)
22+
current_app, jsonify, render_template, request, url_for, redirect,
23+
make_response)
2324
from flask.views import View
2425

2526
from debsources.excepts import (
2627
Http500Error, Http404Error, Http404ErrorSuggestions, Http403Error,
27-
InvalidPackageOrVersionError, Http404MissingCopyright)
28+
InvalidPackageOrVersionError, Http404MissingCopyright,
29+
MissingCopyrightField)
2830
from debsources.models import Package, SuiteAlias
2931
import debsources.query as qry
3032
from debsources.sqla_session import _close_session
@@ -129,6 +131,9 @@ def error_404(self, error):
129131
else:
130132
return render_template('copyright/404_missing.html',
131133
suggestions=suggestions), 404
134+
elif isinstance(error, MissingCopyrightField):
135+
return render_template('copyright/404_missing_copyright.html',
136+
paragraph=error.par)
132137
else:
133138
return render_template('404.html'), 404
134139

@@ -192,6 +197,10 @@ def dispatch_request(self, **kwargs):
192197
"""
193198
try:
194199
context = self.get_objects(**kwargs)
200+
if self.render_func is make_response:
201+
response = make_response(context['spdx'])
202+
response.headers["Content-Disposition"] = context['header']
203+
return response
195204
return self.render_func(**context)
196205
except Http403Error as e:
197206
return self.err_func(e, http=403)

debsources/excepts.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,11 @@ def __init__(self, package, version, path):
4646

4747
class Http403Error(Exception):
4848
pass
49+
50+
51+
class MissingCopyrightField(Http404Error):
52+
def __init__(self, package, version, par):
53+
self.package = package
54+
self.version = version
55+
self.par = par
56+
super(MissingCopyrightField, self).__init__()

debsources/license_helper.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,15 @@
1212
import io
1313
import logging
1414
import re
15+
import hashlib
16+
from datetime import datetime
1517

1618
from flask import url_for
1719
from debian import copyright
1820

21+
from debsources.models import Checksum, File, Package, PackageName
1922
from debsources.navigation import Location, SourceFile
23+
from debsources.excepts import MissingCopyrightField
2024

2125
# import debsources.query as qry
2226

@@ -134,6 +138,10 @@ def get_license(session, package, version, path, license_path=None):
134138
return None
135139

136140

141+
def get_paragraph(c, path):
142+
return c.find_files_paragraph(path)
143+
144+
137145
def get_copyright_header(copyright):
138146
""" Return all the header attributs
139147
@@ -197,6 +205,8 @@ def create_url(glob="", base=None,):
197205
def match_license(synopsis):
198206
""" Matches a `synopsis` with a license and creates a url
199207
"""
208+
if any(keyword in synopsis for keyword in ['with', 'exception']):
209+
return None
200210
key = filter(lambda x: re.search(x, synopsis) is not None, Licenses)
201211
if len(key) is not 0:
202212
return Licenses[key[0]]
@@ -241,3 +251,168 @@ def anchor_to_license(copyright, synopsis):
241251
return '#license-' + str(licenses.index(synopsis))
242252
else:
243253
return None
254+
255+
256+
def export_copyright_to_spdx(c, package, version, session):
257+
""" Creates the SPDX document and saves the result in fname
258+
259+
"""
260+
261+
def create_package_code(session, package, version):
262+
sha = (session.query(Checksum.sha256.label("sha256"))
263+
.filter(Checksum.package_id == Package.id)
264+
.filter(Checksum.file_id == File.id)
265+
.filter(Package.name_id == PackageName.id)
266+
.filter(PackageName.name == package)
267+
.filter(Package.version == version)
268+
.order_by("sha256")
269+
).all()
270+
sha_values = [sha256[0] for sha256 in sha]
271+
return hashlib.sha256("".join(sha_values)).hexdigest()
272+
273+
def create_license_ref(license, count, refs, unknown):
274+
""" Creates license references and adds it in the specific
275+
dictionnary. Also adds the non standard licenses in unknown
276+
licenses.
277+
"""
278+
if license not in refs.keys() and license is not u'':
279+
if not match_license(license):
280+
l_id = 'LicenseRef-' + str(count)
281+
refs[license] = l_id
282+
count += 1
283+
unknown[license] = "LicenseId: " + l_id + \
284+
"\nLicenseName: " + l
285+
else:
286+
# useful in PackageLicenseInfoFromFiles
287+
refs[license] = license
288+
289+
return refs, unknown, count
290+
291+
# set upstream name for native packages
292+
if c.header.upstream_name is not None:
293+
upstream_name = c.header.upstream_name
294+
else:
295+
upstream_name = package
296+
# find out which are not standard and save SPDX required information
297+
# Non standard licenses are referenced as LicenseRed-<number>
298+
refs = dict()
299+
count = 0
300+
unknown = dict()
301+
for par in c.all_files_paragraphs():
302+
try:
303+
l = par.license.synopsis
304+
if any(keyword in l for keyword in ['and', 'or']):
305+
licenses = re.split(', |and |or ', l)
306+
for license in licenses:
307+
refs, unknown, count = create_license_ref(license.rstrip(),
308+
count, refs,
309+
unknown)
310+
else:
311+
refs, unknown, count = create_license_ref(l, count,
312+
refs, unknown)
313+
314+
except (AttributeError, ValueError):
315+
pass
316+
317+
# add the available extracted license text for unknown licenses
318+
for par in c.all_license_paragraphs():
319+
try:
320+
l = par.license.synopsis
321+
if l in refs.keys() and not match_license(l):
322+
unknown[l] = "LicenseID: " + refs[l] + \
323+
"\nExtractedText: <text>" + \
324+
par.license.text + "</text>" + \
325+
"\nLicenseName: " + l
326+
except (AttributeError, ValueError):
327+
pass
328+
329+
time = datetime.now()
330+
now = str(time.date()) + 'T' + str(time.time()).split('.')[0] + 'Z'
331+
332+
spdx = ["SPDXVersion: SPDX-2.0", "DataLicense:CC0-1.0",
333+
"SPDXID: SPDXRef-DOCUMENT",
334+
"Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package",
335+
"DocumentName: " + upstream_name,
336+
"DocumentNamespace: http://spdx.org/spdxdocs/" +
337+
"spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301",
338+
"LicenseListVersion: 2.0",
339+
"Creator: Person: Debsources",
340+
"Creator: Organization: Debsources",
341+
"Creator: Tool: Debsources",
342+
"Created: " + now,
343+
"CreatorComment: <text> This document was created by" +
344+
"Debsources by parsing the respective debian/copyright" +
345+
"file of the package provided by the Debian project. You" +
346+
"may follow these links: http://debian.org/ " +
347+
"http://sources.debian.net/ to get more information about " +
348+
"Debian and Debsources. </text>",
349+
"DocumentComment: <text>This document was created using" +
350+
"SPDX 2.0, version 2.3 of the SPDX License List.</text>",
351+
"PackageName: " + upstream_name,
352+
"SPDXID: SPDXRef-Package",
353+
"PackageDownloadLocation: NOASSERTION",
354+
"PackageVerificationCode: " + create_package_code(session,
355+
package,
356+
version),
357+
"PackageLicenseConcluded: NOASSERTION"]
358+
for value in set(refs.values()):
359+
spdx.append("PackageLicenseInfoFromFiles: " + value)
360+
361+
spdx.extend(["PackageLicenseDeclared: NOASSERTION",
362+
"PackageCopyrightText: NOASSERTION"])
363+
for files in get_files_spdx(refs, package, version, session, c):
364+
for item in files:
365+
spdx.append(str(item))
366+
for u in unknown:
367+
spdx.append(unknown[u])
368+
return spdx
369+
370+
371+
def get_files_spdx(refs, package, version, session, c):
372+
""" Get all files from the DB for a specific package and version and
373+
then create a dictionnary for the SPDX entries
374+
375+
"""
376+
377+
def replace_all(text, dic):
378+
""" Replace all occurences of the keys in dic by the corresponding
379+
value
380+
"""
381+
for i, j in dic.iteritems():
382+
text = text.replace(i, j)
383+
return text
384+
385+
files = (session.query(Checksum.sha256.label("sha256"),
386+
File.path.label("path"))
387+
.filter(Checksum.package_id == Package.id)
388+
.filter(Checksum.file_id == File.id)
389+
.filter(Package.name_id == PackageName.id)
390+
.filter(PackageName.name == package)
391+
.filter(Package.version == version)
392+
)
393+
394+
files_info = []
395+
396+
for i, f in enumerate(files.all()):
397+
par = get_paragraph(c, f.path)
398+
try:
399+
if not match_license(par.license.synopsis):
400+
license_concluded = replace_all(par.license.synopsis, refs)
401+
else:
402+
license_concluded = par.license.synopsis
403+
except (AttributeError, ValueError):
404+
license_concluded = "None"
405+
# NOASSERTION means that the SPDX generator did not calculate that
406+
# value.
407+
sha = 'NOASSERTION' if not f.sha256 else f.sha256
408+
try:
409+
files_info.append(["FileName: " + f.path,
410+
"SPDXID: SPDX-FILE-REF-" + str(i),
411+
"FileChecksum: SHA256: " + sha,
412+
"LicenseConcluded: " + license_concluded,
413+
"LicenseInfoInFile: NOASSERTION",
414+
"FileCopyrightText: <text>" +
415+
par.copyright.encode('utf-8') + "</text>"])
416+
except AttributeError:
417+
raise MissingCopyrightField(package, version, par.files)
418+
return files_info

0 commit comments

Comments
 (0)