|
12 | 12 | import io
|
13 | 13 | import logging
|
14 | 14 | import re
|
| 15 | +import hashlib |
| 16 | +from datetime import datetime |
15 | 17 |
|
16 | 18 | from flask import url_for
|
17 | 19 | from debian import copyright
|
18 | 20 |
|
| 21 | +from debsources.models import Checksum, File, Package, PackageName |
19 | 22 | from debsources.navigation import Location, SourceFile
|
20 | 23 |
|
21 | 24 | # import debsources.query as qry
|
@@ -134,6 +137,10 @@ def get_license(session, package, version, path, license_path=None):
|
134 | 137 | return None
|
135 | 138 |
|
136 | 139 |
|
| 140 | +def get_paragraph(c, path): |
| 141 | + return c.find_files_paragraph(path) |
| 142 | + |
| 143 | + |
137 | 144 | def get_copyright_header(copyright):
|
138 | 145 | """ Return all the header attributs
|
139 | 146 |
|
@@ -197,6 +204,8 @@ def create_url(glob="", base=None,):
|
197 | 204 | def match_license(synopsis):
|
198 | 205 | """ Matches a `synopsis` with a license and creates a url
|
199 | 206 | """
|
| 207 | + if any(keyword in synopsis for keyword in ['with', 'exception']): |
| 208 | + return None |
200 | 209 | key = filter(lambda x: re.search(x, synopsis) is not None, Licenses)
|
201 | 210 | if len(key) is not 0:
|
202 | 211 | return Licenses[key[0]]
|
@@ -241,3 +250,157 @@ def anchor_to_license(copyright, synopsis):
|
241 | 250 | return '#license-' + str(licenses.index(synopsis))
|
242 | 251 | else:
|
243 | 252 | return None
|
| 253 | + |
| 254 | + |
| 255 | +def export_copyright_to_spdx(c, package, version, session): |
| 256 | + """ Creates the SPDX document and saves the result in fname |
| 257 | +
|
| 258 | + """ |
| 259 | + |
| 260 | + def create_package_code(session, package, version): |
| 261 | + sha = (session.query(Checksum.sha256.label("sha256")) |
| 262 | + .filter(Checksum.package_id == Package.id) |
| 263 | + .filter(Checksum.file_id == File.id) |
| 264 | + .filter(Package.name_id == PackageName.id) |
| 265 | + .filter(PackageName.name == package) |
| 266 | + .filter(Package.version == version) |
| 267 | + .order_by("sha256") |
| 268 | + ).all() |
| 269 | + sha_values = [sha256[0] for sha256 in sha] |
| 270 | + return hashlib.sha256("".join(sha_values)).hexdigest() |
| 271 | + |
| 272 | + def create_license_ref(license, count, refs, unknown): |
| 273 | + """ Creates license references and adds it in the specific |
| 274 | + dictionnary. Also adds the non standard licenses in unknown |
| 275 | + licenses. |
| 276 | + """ |
| 277 | + if license not in refs.keys() and license is not u'': |
| 278 | + if not match_license(license): |
| 279 | + l_id = 'LicenseRef-' + str(count) |
| 280 | + refs[license] = l_id |
| 281 | + count += 1 |
| 282 | + unknown[license] = "LicenseId: " + l_id + \ |
| 283 | + "\nLicenseName: " + l |
| 284 | + |
| 285 | + return refs, unknown, count |
| 286 | + |
| 287 | + # find out which are not standard and save SPDX required information |
| 288 | + # Non standard licenses are referenced as LicenseRed-<number> |
| 289 | + refs = dict() |
| 290 | + count = 0 |
| 291 | + unknown = dict() |
| 292 | + for par in c.all_files_paragraphs(): |
| 293 | + try: |
| 294 | + l = par.license.synopsis |
| 295 | + if any(keyword in l for keyword in ['and', 'or']): |
| 296 | + licenses = re.split(', |and |or ', l) |
| 297 | + for license in licenses: |
| 298 | + refs, unknown, count = create_license_ref(license.rstrip(), |
| 299 | + count, refs, |
| 300 | + unknown) |
| 301 | + else: |
| 302 | + refs, unknown, count = create_license_ref(l, count, |
| 303 | + refs, unknown) |
| 304 | + |
| 305 | + except (AttributeError, ValueError): |
| 306 | + pass |
| 307 | + |
| 308 | + # add the available extracted license text for unknown licenses |
| 309 | + for par in c.all_license_paragraphs(): |
| 310 | + try: |
| 311 | + l = par.license.synopsis |
| 312 | + if l in refs.keys() and not match_license(l): |
| 313 | + unknown[l] = "LicenseID: " + refs[l] + \ |
| 314 | + "\nExtractedText: <text>" + \ |
| 315 | + par.license.text + "</text>" + \ |
| 316 | + "\nLicenseName: " + l |
| 317 | + except (AttributeError, ValueError): |
| 318 | + pass |
| 319 | + |
| 320 | + time = datetime.now() |
| 321 | + now = str(time.date()) + 'T' + str(time.time()).split('.')[0] + 'Z' |
| 322 | + |
| 323 | + spdx = ["SPDXVersion: SPDX-2.0", "DataLicense:CC0-1.0", |
| 324 | + "SPDXID: SPDXRef-DOCUMENT", |
| 325 | + "Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package", |
| 326 | + "DocumentName: " + c.header.upstream_name, |
| 327 | + "DocumentNamespace: http://spdx.org/spdxdocs/" + |
| 328 | + "spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301", |
| 329 | + "LicenseListVersion: 2.0", |
| 330 | + "Creator: Person: Debsources", |
| 331 | + "Creator: Organization: Debsources", |
| 332 | + "Creator: Tool: Debsources", |
| 333 | + "Created: " + now, |
| 334 | + "CreatorComment: <text> This document was created by" + |
| 335 | + "Debsources by parsing the respective debian/copyright" + |
| 336 | + "file of the package provided by the Debian project. You" + |
| 337 | + "may follow these links: http://debian.org/ " + |
| 338 | + "http://sources.debian.net/ to get more information about " + |
| 339 | + "Debian and Debsources. </text>", |
| 340 | + "DocumentComment: <text>This document was created using" + |
| 341 | + "SPDX 2.0, version 2.3 of the SPDX License List.</text>", |
| 342 | + "PackageName: " + c.header.upstream_name, |
| 343 | + "SPDXID: SPDXRef-Package", |
| 344 | + "PackageDownloadLocation: NOASSERTION", |
| 345 | + "PackageVerificationCode: " + create_package_code(session, |
| 346 | + package, |
| 347 | + version), |
| 348 | + "PackageLicenseConcluded: NOASSERTION"] |
| 349 | + for value in set(refs.values()): |
| 350 | + spdx.append("PackageLicenseInfoFromFiles: " + value) |
| 351 | + |
| 352 | + spdx.extend(["PackageLicenseDeclared: NOASSERTION", |
| 353 | + "PackageCopyrightText: NOASSERTION"]) |
| 354 | + for files in get_files_spdx(refs, package, version, session, c): |
| 355 | + for item in files: |
| 356 | + spdx.append(str(item)) |
| 357 | + for u in unknown: |
| 358 | + spdx.append(unknown[u]) |
| 359 | + return spdx |
| 360 | + |
| 361 | + |
| 362 | +def get_files_spdx(refs, package, version, session, c): |
| 363 | + """ Get all files from the DB for a specific package and version and |
| 364 | + then create a dictionnary for the SPDX entries |
| 365 | +
|
| 366 | + """ |
| 367 | + |
| 368 | + def replace_all(text, dic): |
| 369 | + """ Replace all occurences of the keys in dic by the corresponding |
| 370 | + value |
| 371 | + """ |
| 372 | + for i, j in dic.iteritems(): |
| 373 | + text = text.replace(i, j) |
| 374 | + return text |
| 375 | + |
| 376 | + files = (session.query(Checksum.sha256.label("sha256"), |
| 377 | + File.path.label("path")) |
| 378 | + .filter(Checksum.package_id == Package.id) |
| 379 | + .filter(Checksum.file_id == File.id) |
| 380 | + .filter(Package.name_id == PackageName.id) |
| 381 | + .filter(PackageName.name == package) |
| 382 | + .filter(Package.version == version) |
| 383 | + ) |
| 384 | + |
| 385 | + files_info = [] |
| 386 | + |
| 387 | + for i, f in enumerate(files.all()): |
| 388 | + par = get_paragraph(c, f.path) |
| 389 | + try: |
| 390 | + if not match_license(par.license.synopsis): |
| 391 | + license_concluded = replace_all(par.license.synopsis, refs) |
| 392 | + else: |
| 393 | + license_concluded = par.license.synopsis |
| 394 | + except (AttributeError, ValueError): |
| 395 | + license_concluded = "None" |
| 396 | + # NOASSERTION means that the SPDX generator did not calculate that |
| 397 | + # value. |
| 398 | + sha = 'NOASSERTION' if not f.sha256 else f.sha256 |
| 399 | + files_info.append(["FileName: " + f.path, |
| 400 | + "SPDXID: SPDX-FILE-REF-" + str(i), |
| 401 | + "FileChecksum: SHA256: " + sha, |
| 402 | + "LicenseConcluded: " + license_concluded, |
| 403 | + "LicenseInfoInFile: NOASSERTION", |
| 404 | + "FileCopyrightText: <text>" + |
| 405 | + par.copyright.encode('utf-8') + "</text>"]) |
| 406 | + return files_info |
0 commit comments