Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,16 @@ def handle(self, *args, **options) -> None:
chapter.deactivate()
continue

chapter.leaders_raw = chapter.get_leaders()
leaders_emails = chapter.get_leaders_emails()
if leaders_emails:
chapter.sync_leaders(leaders_emails)

# Get related URLs.
scraped_urls = sorted(
{
repository_url
for url in set(scraper.get_urls())
for url in set(chapter.get_urls())
if (
repository_url := normalize_url(
chapter.get_related_url(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,16 @@ def handle(self, *args, **options) -> None:
committee.deactivate()
continue

committee.leaders_raw = committee.get_leaders()
leaders_emails = committee.get_leaders_emails()
if leaders_emails:
committee.sync_leaders(leaders_emails)

# Get related URLs.
scraped_urls = sorted(
{
repository_url
for url in set(scraper.get_urls())
for url in set(committee.get_urls())
if (
repository_url := normalize_url(
committee.get_related_url(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,21 @@ def handle(self, *args, **options) -> None:

scraper = OwaspScraper(project.owasp_url)
if scraper.page_tree is None:
print("what")
project.deactivate()
continue

project.audience = scraper.get_audience()
project.audience = project.get_audience()
project.leaders_raw = project.get_leaders()
leaders_emails = project.get_leaders_emails()
if leaders_emails:
project.sync_leaders(leaders_emails)

# Get GitHub URLs.
scraped_urls = sorted(
{
repository_url
for url in set(scraper.get_urls(domain="github.com"))
for url in set(project.get_urls(domain="github.com"))
if (repository_url := normalize_url(project.get_related_url(url)))
and repository_url not in {project.github_url, project.owasp_url}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Generated by Django 5.2.6 on 2025-09-06 10:44

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("github", "0035_alter_user_bio_alter_user_is_owasp_staff"),
("owasp", "0050_alter_entitymember_role"),
]

operations = [
migrations.AddField(
model_name="entitymember",
name="member_email",
field=models.EmailField(blank=True, default="", max_length=254),
),
migrations.AddField(
model_name="entitymember",
name="member_name",
field=models.CharField(default="", max_length=255),
),
migrations.AlterField(
model_name="entitymember",
name="member",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="+",
to="github.user",
),
),
]
102 changes: 102 additions & 0 deletions backend/apps/owasp/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from urllib.parse import urlparse

import yaml
from django.contrib.contenttypes.models import ContentType
from django.db import models

from apps.common.models import BulkSaveModel
from apps.common.open_ai import OpenAi
from apps.github.constants import (
GITHUB_REPOSITORY_RE,
Expand All @@ -18,6 +20,7 @@
from apps.github.models.user import User
from apps.github.utils import get_repository_file_content
from apps.owasp.models.entity_member import EntityMember
from apps.owasp.models.enums.project import AudienceChoices

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -101,6 +104,16 @@ def index_md_url(self) -> str | None:
else None
)

@property
def info_md_url(self) -> str | None:
"""Return entity's raw info.md GitHub URL."""
return (
"https://raw.githubusercontent.com/OWASP/"
f"{self.owasp_repository.key}/{self.owasp_repository.default_branch}/info.md"
if self.owasp_repository
else None
)

@property
def entity_leaders(self) -> models.QuerySet[User]:
"""Return entity's leaders."""
Expand Down Expand Up @@ -160,6 +173,21 @@ def generate_summary(self, prompt, open_ai=None, max_tokens=500):
open_ai.set_max_tokens(max_tokens).set_prompt(prompt)
self.summary = open_ai.complete() or ""

def get_audience(self):
"""Get audience from info.md file on GitHub."""
content = get_repository_file_content(self.info_md_url)
if not content:
return []

found_keywords = set()

for line in content.split("\n"):
for lower_kw, original_kw in AudienceChoices.choices:
if original_kw in line:
found_keywords.add(lower_kw)

return sorted(found_keywords)

def get_leaders(self):
"""Get leaders from leaders.md file on GitHub."""
content = get_repository_file_content(self.leaders_md_url)
Expand All @@ -182,6 +210,26 @@ def get_leaders(self):

return leaders

def get_leaders_emails(self):
"""Get leaders emails from leaders.md file on GitHub."""
content = get_repository_file_content(self.leaders_md_url)
if not content:
return {}

leaders = {}
for line in content.split("\n"):
matches = re.findall(
r"^[-*]\s*\[([^\]]+)\]\(mailto:([^)]+)(\)|([^[<\n]))", line.strip()
)

for match in matches:
if match[0] and match[1]: # Name with email
leaders[match[0].strip()] = match[1].strip()
elif match[2]: # Name without email
leaders[match[2].strip()] = None

return leaders

def get_metadata(self):
"""Get entity metadata."""
try:
Expand Down Expand Up @@ -224,6 +272,19 @@ def get_related_url(self, url, exclude_domains=(), include_domains=()) -> str |

return url

def get_urls(self, domain=None):
"""Get URLs from info.md file on GitHub."""
content = get_repository_file_content(self.info_md_url)
if not content:
return []

urls = re.findall(r"https?:\/\/[^\s\)]+", content.strip())

if domain:
return [url for url in urls if urlparse(url).netloc == domain]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we also match subdomains?


return urls

def parse_tags(self, tags) -> list[str]:
"""Parse entity tags."""
if not tags:
Expand All @@ -234,3 +295,44 @@ def parse_tags(self, tags) -> list[str]:
if isinstance(tags, str)
else tags
)

def sync_leaders(self, leaders_emails):
"""Sync Leaders data.

Args:
leaders_emails (dict[str, str | None]): A dictionary
where keys are the full names of the leaders
and values are their corresponding email addresses (or None if no email is provided).

"""
content_type = ContentType.objects.get_for_model(self.__class__)
existing_leaders = {
leader.member_name: leader
for leader in EntityMember.objects.filter(
entity_type=content_type, entity_id=self.id, role=EntityMember.Role.LEADER
)
}

leaders = []
for order, (name, email) in enumerate(leaders_emails.items()):
if name in existing_leaders:
leader = existing_leaders[name]
if leader.member_email != (email or ""):
leader.member_email = email or ""
leaders.append(leader)
else:
leaders.append(
EntityMember(
entity_type=content_type,
entity_id=self.id,
member_name=name,
member_email=email or "",
role=EntityMember.Role.LEADER,
order=order,
is_active=True,
is_reviewed=False,
)
)

if leaders:
BulkSaveModel.bulk_save(EntityMember, leaders, ["member_email"])
7 changes: 6 additions & 1 deletion backend/apps/owasp/models/entity_member.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,13 @@ class Meta:
)
member = models.ForeignKey(
User,
blank=True,
null=True,
on_delete=models.CASCADE,
related_name="+",
)
member_email = models.EmailField(blank=True, default="")
member_name = models.CharField(default="", max_length=255)
order = models.PositiveSmallIntegerField(
default=0,
help_text="Display order/priority of members",
Expand All @@ -64,4 +68,5 @@ class Meta:

def __str__(self):
"""EntityMember human readable representation."""
return f"{self.member.login} as {self.get_role_display()} for {self.entity}"
display_name = self.member.login if self.member else self.member_name
return f"{display_name} as {self.get_role_display()} for {self.entity}"
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def mock_chapter(self):
@mock.patch.object(Chapter, "bulk_save", autospec=True)
def test_handle(self, mock_bulk_save, command, mock_chapter, offset, chapters):
mock_scraper = mock.Mock(spec=OwaspScraper)
mock_scraper.get_urls.return_value = [
mock_chapter.get_urls.return_value = [
"https://example.com/repo1",
"https://example.com/repo2",
"https://invalid.com/repo3",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def mock_committee(self):
@mock.patch.object(Committee, "bulk_save", autospec=True)
def test_handle(self, mock_bulk_save, command, mock_committee, offset, committees):
mock_scraper = mock.Mock(spec=OwaspScraper)
mock_scraper.get_urls.return_value = [
mock_committee.get_urls.return_value = [
"https://example.com/repo1",
"https://example.com/repo2",
"https://invalid.com/repo3",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def test_audience(self, mock_github, mock_bulk_save, command, mock_project):
"""Test audience validation logic."""
mock_scraper = mock.Mock(spec=OwaspScraper)
mock_scraper.page_tree = True
mock_scraper.get_urls.return_value = []
mock_scraper.get_audience.return_value = ["builder", "breaker", "defender"]
mock_project.get_urls.return_value = []
mock_project.get_audience.return_value = ["builder", "breaker", "defender"]

mock_active_projects = mock.MagicMock()
mock_active_projects.__iter__.return_value = iter([mock_project])
Expand Down Expand Up @@ -71,12 +71,12 @@ def test_audience(self, mock_github, mock_bulk_save, command, mock_project):
def test_urls(self, mock_github, mock_bulk_save, command, mock_project, offset, project_count):
"""Tests the existing URL scraping logic, ensuring it still passes."""
mock_scraper = mock.Mock(spec=OwaspScraper)
mock_scraper.get_urls.return_value = [
mock_project.get_urls.return_value = [
"https://github.com/org/repo1",
"https://github.com/org/repo2",
"https://invalid.com/repo3",
]
mock_scraper.get_audience.return_value = []
mock_project.get_audience.return_value = []
mock_scraper.verify_url.side_effect = lambda url: None if "invalid" in url else url
mock_scraper.page_tree = True

Expand Down
Loading