Skip to content

Commit 56f332e

Browse files
committed
Add route, view and helpers for exporting d/copyright to spdx
1 parent e634464 commit 56f332e

File tree

5 files changed

+234
-3
lines changed

5 files changed

+234
-3
lines changed

debsources/app/copyright/routes.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212
from __future__ import absolute_import
1313

1414

15-
from flask import jsonify
15+
from flask import jsonify, make_response
1616

1717
from ..helper import bind_render
1818
from . import bp_copyright
1919
from ..views import (IndexView, PrefixView, ListPackagesView, ErrorHandler,
2020
Ping, PackageVersionsView, DocView, AboutView, SearchView)
21-
from .views import LicenseView, ChecksumLicenseView, SearchFileView, StatsView
21+
from .views import (LicenseView, ChecksumLicenseView, SearchFileView,
22+
StatsView, SPDXView)
2223

2324

2425
# context vars
@@ -254,3 +255,11 @@ def skeleton_variables():
254255
render_func=jsonify,
255256
err_func=ErrorHandler(mode='json'),
256257
get_objects='stats_suite'))
258+
259+
# SDPX view
260+
bp_copyright.add_url_rule(
261+
'/spdx/<path:path_to>/',
262+
view_func=SPDXView.as_view(
263+
'spdx',
264+
render_func=make_response,
265+
err_func=ErrorHandler(mode='json')))

debsources/app/copyright/templates/copyright/license.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ <h2>{{ self.title() }} / {{ version }}</h2>
2727
{% if dump == 'True' %}
2828
{% include "source_file_code.inc.html" %}
2929
{% else %}
30+
<div class="warning"><a href="{{url_for('.spdx', path_to=package + '/' + version) }}">Export to SPDX</a></div>
3031
{% include "copyright/license_render.inc.html" %}
3132
{% endif %}
3233
{% endblock %}

debsources/app/copyright/views.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,53 @@ def get_stats(self):
339339
dual_results=dual_res,
340340
dual_licenses=sorted(dual_licenses),
341341
suites=all_suites)
342+
343+
344+
class SPDXView(GeneralView):
345+
346+
def _generate_file(self, spdx_values):
347+
output = ''
348+
for value in spdx_values:
349+
output += value.decode('utf-8') + '\n'
350+
return output
351+
352+
def get_objects(self, path_to):
353+
path_dict = path_to.split('/')
354+
355+
package = path_dict[0]
356+
version = path_dict[1]
357+
path = '/'.join(path_dict[2:])
358+
359+
if version == "latest": # we search the latest available version
360+
return self._handle_latest_version(request.endpoint,
361+
package, path)
362+
363+
versions = self.handle_versions(version, package, path)
364+
if versions:
365+
redirect_url_parts = [package, versions[-1]]
366+
if path:
367+
redirect_url_parts.append(path)
368+
redirect_url = '/'.join(redirect_url_parts)
369+
return self._redirect_to_url(request.endpoint,
370+
redirect_url, redirect_code=302)
371+
372+
try:
373+
sources_path = helper.get_sources_path(session, package, version,
374+
current_app.config)
375+
except FileOrFolderNotFound:
376+
raise Http404ErrorSuggestions(package, version,
377+
'debian/copyright')
378+
except InvalidPackageOrVersionError:
379+
raise Http404ErrorSuggestions(package, version, '')
380+
381+
try:
382+
c = helper.parse_license(sources_path)
383+
except Exception:
384+
# non machine readable license
385+
return dict(return_code=404)
386+
spdx = helper.export_copyright_to_spdx(
387+
c, session=session, package=package, version=version)
388+
attachment = "attachment;" + "filename=" + \
389+
path_to.replace('/', '_') + ".spdx"
390+
return dict(spdx=self._generate_file(spdx),
391+
header=attachment)

debsources/app/views.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from debian.debian_support import version_compare
2020

2121
from flask import (
22-
current_app, jsonify, render_template, request, url_for, redirect)
22+
current_app, jsonify, render_template, request, url_for, redirect,
23+
make_response)
2324
from flask.views import View
2425

2526
from debsources.excepts import (
@@ -192,6 +193,10 @@ def dispatch_request(self, **kwargs):
192193
"""
193194
try:
194195
context = self.get_objects(**kwargs)
196+
if self.render_func is make_response:
197+
response = make_response(context['spdx'])
198+
response.headers["Content-Disposition"] = context['header']
199+
return response
195200
return self.render_func(**context)
196201
except Http403Error as e:
197202
return self.err_func(e, http=403)

debsources/license_helper.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
import io
1313
import logging
1414
import re
15+
import hashlib
16+
from datetime import datetime
1517

1618
from flask import url_for
1719
from debian import copyright
1820

21+
from debsources.models import Checksum, File, Package, PackageName
1922
from debsources.navigation import Location, SourceFile
2023

2124
# import debsources.query as qry
@@ -134,6 +137,10 @@ def get_license(session, package, version, path, license_path=None):
134137
return None
135138

136139

140+
def get_paragraph(c, path):
141+
return c.find_files_paragraph(path)
142+
143+
137144
def get_copyright_header(copyright):
138145
""" Return all the header attributs
139146
@@ -197,6 +204,8 @@ def create_url(glob="", base=None,):
197204
def match_license(synopsis):
198205
""" Matches a `synopsis` with a license and creates a url
199206
"""
207+
if any(keyword in synopsis for keyword in ['with', 'exception']):
208+
return None
200209
key = filter(lambda x: re.search(x, synopsis) is not None, Licenses)
201210
if len(key) is not 0:
202211
return Licenses[key[0]]
@@ -241,3 +250,160 @@ def anchor_to_license(copyright, synopsis):
241250
return '#license-' + str(licenses.index(synopsis))
242251
else:
243252
return None
253+
254+
255+
def export_copyright_to_spdx(c, package, version, session):
256+
""" Creates the SPDX document and saves the result in fname
257+
258+
"""
259+
260+
def create_package_code(session, package, version):
261+
sha = (session.query(Checksum.sha256.label("sha256"))
262+
.filter(Checksum.package_id == Package.id)
263+
.filter(Checksum.file_id == File.id)
264+
.filter(Package.name_id == PackageName.id)
265+
.filter(PackageName.name == package)
266+
.filter(Package.version == version)
267+
.order_by("sha256")
268+
).all()
269+
sha_values = [sha256[0] for sha256 in sha]
270+
return hashlib.sha256("".join(sha_values)).hexdigest()
271+
272+
def create_license_ref(license, count, refs, unknown):
273+
""" Creates license references and adds it in the specific
274+
dictionnary. Also adds the non standard licenses in unknown
275+
licenses.
276+
"""
277+
if license not in refs.keys() and license is not u'':
278+
if not match_license(license):
279+
l_id = 'LicenseRef-' + str(count)
280+
refs[license] = l_id
281+
count += 1
282+
unknown[license] = "LicenseId: " + l_id + \
283+
"\nLicenseName: " + l
284+
else:
285+
# useful in PackageLicenseInfoFromFiles
286+
refs[license] = license
287+
288+
return refs, unknown, count
289+
290+
# find out which are not standard and save SPDX required information
291+
# Non standard licenses are referenced as LicenseRed-<number>
292+
refs = dict()
293+
count = 0
294+
unknown = dict()
295+
for par in c.all_files_paragraphs():
296+
try:
297+
l = par.license.synopsis
298+
if any(keyword in l for keyword in ['and', 'or']):
299+
licenses = re.split(', |and |or ', l)
300+
for license in licenses:
301+
refs, unknown, count = create_license_ref(license.rstrip(),
302+
count, refs,
303+
unknown)
304+
else:
305+
refs, unknown, count = create_license_ref(l, count,
306+
refs, unknown)
307+
308+
except (AttributeError, ValueError):
309+
pass
310+
311+
# add the available extracted license text for unknown licenses
312+
for par in c.all_license_paragraphs():
313+
try:
314+
l = par.license.synopsis
315+
if l in refs.keys() and not match_license(l):
316+
unknown[l] = "LicenseID: " + refs[l] + \
317+
"\nExtractedText: <text>" + \
318+
par.license.text + "</text>" + \
319+
"\nLicenseName: " + l
320+
except (AttributeError, ValueError):
321+
pass
322+
323+
time = datetime.now()
324+
now = str(time.date()) + 'T' + str(time.time()).split('.')[0] + 'Z'
325+
326+
spdx = ["SPDXVersion: SPDX-2.0", "DataLicense:CC0-1.0",
327+
"SPDXID: SPDXRef-DOCUMENT",
328+
"Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package",
329+
"DocumentName: " + c.header.upstream_name,
330+
"DocumentNamespace: http://spdx.org/spdxdocs/" +
331+
"spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301",
332+
"LicenseListVersion: 2.0",
333+
"Creator: Person: Debsources",
334+
"Creator: Organization: Debsources",
335+
"Creator: Tool: Debsources",
336+
"Created: " + now,
337+
"CreatorComment: <text> This document was created by" +
338+
"Debsources by parsing the respective debian/copyright" +
339+
"file of the package provided by the Debian project. You" +
340+
"may follow these links: http://debian.org/ " +
341+
"http://sources.debian.net/ to get more information about " +
342+
"Debian and Debsources. </text>",
343+
"DocumentComment: <text>This document was created using" +
344+
"SPDX 2.0, version 2.3 of the SPDX License List.</text>",
345+
"PackageName: " + c.header.upstream_name,
346+
"SPDXID: SPDXRef-Package",
347+
"PackageDownloadLocation: NOASSERTION",
348+
"PackageVerificationCode: " + create_package_code(session,
349+
package,
350+
version),
351+
"PackageLicenseConcluded: NOASSERTION"]
352+
for value in set(refs.values()):
353+
spdx.append("PackageLicenseInfoFromFiles: " + value)
354+
355+
spdx.extend(["PackageLicenseDeclared: NOASSERTION",
356+
"PackageCopyrightText: NOASSERTION"])
357+
for files in get_files_spdx(refs, package, version, session, c):
358+
for item in files:
359+
spdx.append(str(item))
360+
for u in unknown:
361+
spdx.append(unknown[u])
362+
return spdx
363+
364+
365+
def get_files_spdx(refs, package, version, session, c):
366+
""" Get all files from the DB for a specific package and version and
367+
then create a dictionnary for the SPDX entries
368+
369+
"""
370+
371+
def replace_all(text, dic):
372+
""" Replace all occurences of the keys in dic by the corresponding
373+
value
374+
"""
375+
for i, j in dic.iteritems():
376+
text = text.replace(i, j)
377+
return text
378+
379+
files = (session.query(Checksum.sha256.label("sha256"),
380+
File.path.label("path"))
381+
.filter(Checksum.package_id == Package.id)
382+
.filter(Checksum.file_id == File.id)
383+
.filter(Package.name_id == PackageName.id)
384+
.filter(PackageName.name == package)
385+
.filter(Package.version == version)
386+
)
387+
388+
files_info = []
389+
390+
for i, f in enumerate(files.all()):
391+
par = get_paragraph(c, f.path)
392+
try:
393+
if not match_license(par.license.synopsis):
394+
license_concluded = replace_all(par.license.synopsis, refs)
395+
else:
396+
license_concluded = par.license.synopsis
397+
except (AttributeError, ValueError):
398+
license_concluded = "None"
399+
# NOASSERTION means that the SPDX generator did not calculate that
400+
# value.
401+
sha = 'NOASSERTION' if not f.sha256 else f.sha256
402+
files_info.append(["FileName: " + f.path,
403+
"SPDXID: SPDX-FILE-REF-" + str(i),
404+
"FileChecksum: SHA256: " + sha,
405+
"LicenseConcluded: " + license_concluded,
406+
"LicenseInfoInFile: NOASSERTION",
407+
"FileCopyrightText: <text>" +
408+
par.copyright.encode('utf-8') + "</text>"])
409+
return files_info

0 commit comments

Comments
 (0)