Skip to content

Commit ffd6ae7

Browse files
committed
Add route, view and helpers for exporting d/copyright to spdx
1 parent eda9ff9 commit ffd6ae7

File tree

5 files changed

+231
-3
lines changed

5 files changed

+231
-3
lines changed

debsources/app/copyright/routes.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212
from __future__ import absolute_import
1313

1414

15-
from flask import jsonify
15+
from flask import jsonify, make_response
1616

1717
from ..helper import bind_render
1818
from . import bp_copyright
1919
from ..views import (IndexView, PrefixView, ListPackagesView, ErrorHandler,
2020
Ping, PackageVersionsView, DocView, AboutView, SearchView)
21-
from .views import LicenseView, ChecksumLicenseView, SearchFileView, StatsView
21+
from .views import (LicenseView, ChecksumLicenseView, SearchFileView,
22+
StatsView, SPDXView)
2223

2324

2425
# context vars
@@ -254,3 +255,11 @@ def skeleton_variables():
254255
render_func=jsonify,
255256
err_func=ErrorHandler(mode='json'),
256257
get_objects='stats_suite'))
258+
259+
# SDPX view
260+
bp_copyright.add_url_rule(
261+
'/spdx/<path:path_to>/',
262+
view_func=SPDXView.as_view(
263+
'spdx',
264+
render_func=make_response,
265+
err_func=ErrorHandler(mode='json')))

debsources/app/copyright/templates/copyright/license.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ <h2>{{ self.title() }} / {{ version }}</h2>
2727
{% if dump == 'True' %}
2828
{% include "source_file_code.inc.html" %}
2929
{% else %}
30+
<div class="warning"><a href="{{url_for('.spdx', path_to=package + '/' + version) }}">Export to SPDX</a></div>
3031
{% include "copyright/license_render.inc.html" %}
3132
{% endif %}
3233
{% endblock %}

debsources/app/copyright/views.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,53 @@ def get_stats(self):
339339
dual_results=dual_res,
340340
dual_licenses=sorted(dual_licenses),
341341
suites=all_suites)
342+
343+
344+
class SPDXView(GeneralView):
345+
346+
def _generate_file(self, spdx_values):
347+
output = ''
348+
for value in spdx_values:
349+
output += value.decode('utf-8') + '\n'
350+
return output
351+
352+
def get_objects(self, path_to):
353+
path_dict = path_to.split('/')
354+
355+
package = path_dict[0]
356+
version = path_dict[1]
357+
path = '/'.join(path_dict[2:])
358+
359+
if version == "latest": # we search the latest available version
360+
return self._handle_latest_version(request.endpoint,
361+
package, path)
362+
363+
versions = self.handle_versions(version, package, path)
364+
if versions:
365+
redirect_url_parts = [package, versions[-1]]
366+
if path:
367+
redirect_url_parts.append(path)
368+
redirect_url = '/'.join(redirect_url_parts)
369+
return self._redirect_to_url(request.endpoint,
370+
redirect_url, redirect_code=302)
371+
372+
try:
373+
sources_path = helper.get_sources_path(session, package, version,
374+
current_app.config)
375+
except FileOrFolderNotFound:
376+
raise Http404ErrorSuggestions(package, version,
377+
'debian/copyright')
378+
except InvalidPackageOrVersionError:
379+
raise Http404ErrorSuggestions(package, version, '')
380+
381+
try:
382+
c = helper.parse_license(sources_path)
383+
except Exception:
384+
# non machine readable license
385+
return dict(return_code=404)
386+
spdx = helper.export_copyright_to_spdx(
387+
c, session=session, package=package, version=version)
388+
attachment = "attachment;" + "filename=" + \
389+
path_to.replace('/', '_') + ".spdx"
390+
return dict(spdx=self._generate_file(spdx),
391+
header=attachment)

debsources/app/views.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from debian.debian_support import version_compare
2020

2121
from flask import (
22-
current_app, jsonify, render_template, request, url_for, redirect)
22+
current_app, jsonify, render_template, request, url_for, redirect,
23+
make_response)
2324
from flask.views import View
2425

2526
from debsources.excepts import (
@@ -192,6 +193,10 @@ def dispatch_request(self, **kwargs):
192193
"""
193194
try:
194195
context = self.get_objects(**kwargs)
196+
if self.render_func is make_response:
197+
response = make_response(context['spdx'])
198+
response.headers["Content-Disposition"] = context['header']
199+
return response
195200
return self.render_func(**context)
196201
except Http403Error as e:
197202
return self.err_func(e, http=403)

debsources/license_helper.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
import io
1313
import logging
1414
import re
15+
import hashlib
16+
from datetime import datetime
1517

1618
from flask import url_for
1719
from debian import copyright
1820

21+
from debsources.models import Checksum, File, Package, PackageName
1922
from debsources.navigation import Location, SourceFile
2023

2124
# import debsources.query as qry
@@ -134,6 +137,10 @@ def get_license(session, package, version, path, license_path=None):
134137
return None
135138

136139

140+
def get_paragraph(c, path):
141+
return c.find_files_paragraph(path)
142+
143+
137144
def get_copyright_header(copyright):
138145
""" Return all the header attributs
139146
@@ -197,6 +204,8 @@ def create_url(glob="", base=None,):
197204
def match_license(synopsis):
198205
""" Matches a `synopsis` with a license and creates a url
199206
"""
207+
if any(keyword in synopsis for keyword in ['with', 'exception']):
208+
return None
200209
key = filter(lambda x: re.search(x, synopsis) is not None, Licenses)
201210
if len(key) is not 0:
202211
return Licenses[key[0]]
@@ -241,3 +250,157 @@ def anchor_to_license(copyright, synopsis):
241250
return '#license-' + str(licenses.index(synopsis))
242251
else:
243252
return None
253+
254+
255+
def export_copyright_to_spdx(c, package, version, session):
256+
""" Creates the SPDX document and saves the result in fname
257+
258+
"""
259+
260+
def create_package_code(session, package, version):
261+
sha = (session.query(Checksum.sha256.label("sha256"))
262+
.filter(Checksum.package_id == Package.id)
263+
.filter(Checksum.file_id == File.id)
264+
.filter(Package.name_id == PackageName.id)
265+
.filter(PackageName.name == package)
266+
.filter(Package.version == version)
267+
.order_by("sha256")
268+
).all()
269+
sha_values = [sha256[0] for sha256 in sha]
270+
return hashlib.sha256("".join(sha_values)).hexdigest()
271+
272+
def create_license_ref(license, count, refs, unknown):
273+
""" Creates license references and adds it in the specific
274+
dictionnary. Also adds the non standard licenses in unknown
275+
licenses.
276+
"""
277+
if license not in refs.keys() and license is not u'':
278+
if not match_license(license):
279+
l_id = 'LicenseRef-' + str(count)
280+
refs[license] = l_id
281+
count += 1
282+
unknown[license] = "LicenseId: " + l_id + \
283+
"\nLicenseName: " + l
284+
285+
return refs, unknown, count
286+
287+
# find out which are not standard and save SPDX required information
288+
# Non standard licenses are referenced as LicenseRed-<number>
289+
refs = dict()
290+
count = 0
291+
unknown = dict()
292+
for par in c.all_files_paragraphs():
293+
try:
294+
l = par.license.synopsis
295+
if any(keyword in l for keyword in ['and', 'or']):
296+
licenses = re.split(', |and |or ', l)
297+
for license in licenses:
298+
refs, unknown, count = create_license_ref(license.rstrip(),
299+
count, refs,
300+
unknown)
301+
else:
302+
refs, unknown, count = create_license_ref(l, count,
303+
refs, unknown)
304+
305+
except (AttributeError, ValueError):
306+
pass
307+
308+
# add the available extracted license text for unknown licenses
309+
for par in c.all_license_paragraphs():
310+
try:
311+
l = par.license.synopsis
312+
if l in refs.keys() and not match_license(l):
313+
unknown[l] = "LicenseID: " + refs[l] + \
314+
"\nExtractedText: <text>" + \
315+
par.license.text + "</text>" + \
316+
"\nLicenseName: " + l
317+
except (AttributeError, ValueError):
318+
pass
319+
320+
time = datetime.now()
321+
now = str(time.date()) + 'T' + str(time.time()).split('.')[0] + 'Z'
322+
323+
spdx = ["SPDXVersion: SPDX-2.0", "DataLicense:CC0-1.0",
324+
"SPDXID: SPDXRef-DOCUMENT",
325+
"Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package",
326+
"DocumentName: " + c.header.upstream_name,
327+
"DocumentNamespace: http://spdx.org/spdxdocs/" +
328+
"spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301",
329+
"LicenseListVersion: 2.0",
330+
"Creator: Person: Debsources",
331+
"Creator: Organization: Debsources",
332+
"Creator: Tool: Debsources",
333+
"Created: " + now,
334+
"CreatorComment: <text> This document was created by" +
335+
"Debsources by parsing the respective debian/copyright" +
336+
"file of the package provided by the Debian project. You" +
337+
"may follow these links: http://debian.org/ " +
338+
"http://sources.debian.net/ to get more information about " +
339+
"Debian and Debsources. </text>",
340+
"DocumentComment: <text>This document was created using" +
341+
"SPDX 2.0, version 2.3 of the SPDX License List.</text>",
342+
"PackageName: " + c.header.upstream_name,
343+
"SPDXID: SPDXRef-Package",
344+
"PackageDownloadLocation: NOASSERTION",
345+
"PackageVerificationCode: " + create_package_code(session,
346+
package,
347+
version),
348+
"PackageLicenseConcluded: NOASSERTION"]
349+
for value in set(refs.values()):
350+
spdx.append("PackageLicenseInfoFromFiles: " + value)
351+
352+
spdx.extend(["PackageLicenseDeclared: NOASSERTION",
353+
"PackageCopyrightText: NOASSERTION"])
354+
for files in get_files_spdx(refs, package, version, session, c):
355+
for item in files:
356+
spdx.append(str(item))
357+
for u in unknown:
358+
spdx.append(unknown[u])
359+
return spdx
360+
361+
362+
def get_files_spdx(refs, package, version, session, c):
363+
""" Get all files from the DB for a specific package and version and
364+
then create a dictionnary for the SPDX entries
365+
366+
"""
367+
368+
def replace_all(text, dic):
369+
""" Replace all occurences of the keys in dic by the corresponding
370+
value
371+
"""
372+
for i, j in dic.iteritems():
373+
text = text.replace(i, j)
374+
return text
375+
376+
files = (session.query(Checksum.sha256.label("sha256"),
377+
File.path.label("path"))
378+
.filter(Checksum.package_id == Package.id)
379+
.filter(Checksum.file_id == File.id)
380+
.filter(Package.name_id == PackageName.id)
381+
.filter(PackageName.name == package)
382+
.filter(Package.version == version)
383+
)
384+
385+
files_info = []
386+
387+
for i, f in enumerate(files.all()):
388+
par = get_paragraph(c, f.path)
389+
try:
390+
if not match_license(par.license.synopsis):
391+
license_concluded = replace_all(par.license.synopsis, refs)
392+
else:
393+
license_concluded = par.license.synopsis
394+
except (AttributeError, ValueError):
395+
license_concluded = "None"
396+
# NOASSERTION means that the SPDX generator did not calculate that
397+
# value.
398+
sha = 'NOASSERTION' if not f.sha256 else f.sha256
399+
files_info.append(["FileName: " + f.path,
400+
"SPDXID: SPDX-FILE-REF-" + str(i),
401+
"FileChecksum: SHA256: " + sha,
402+
"LicenseConcluded: " + license_concluded,
403+
"LicenseInfoInFile: NOASSERTION",
404+
"FileCopyrightText: <text>" +
405+
par.copyright.encode('utf-8') + "</text>"])
406+
return files_info

0 commit comments

Comments
 (0)