diff --git a/backend/apps/owasp/admin/entity_member.py b/backend/apps/owasp/admin/entity_member.py index 33202a94a1..1aef460fbb 100644 --- a/backend/apps/owasp/admin/entity_member.py +++ b/backend/apps/owasp/admin/entity_member.py @@ -17,17 +17,8 @@ class EntityMemberAdmin(admin.ModelAdmin): actions = ("approve_members",) autocomplete_fields = ("member",) - fields = ( - "entity_type", - "entity_id", - "member", - "role", - "order", - "is_active", - "is_reviewed", - "description", - ) list_display = ( + "member_name", "member", "entity", "owasp_url", diff --git a/backend/apps/owasp/management/commands/owasp_scrape_chapters.py b/backend/apps/owasp/management/commands/owasp_scrape_chapters.py index a744340af8..6af1050eb5 100644 --- a/backend/apps/owasp/management/commands/owasp_scrape_chapters.py +++ b/backend/apps/owasp/management/commands/owasp_scrape_chapters.py @@ -39,11 +39,15 @@ def handle(self, *args, **options) -> None: chapter.deactivate() continue + chapter.leaders_raw = chapter.get_leaders() + if leaders_emails := chapter.get_leaders_emails(): + chapter.sync_leaders(leaders_emails) + # Get related URLs. scraped_urls = sorted( { repository_url - for url in set(scraper.get_urls()) + for url in set(chapter.get_urls()) if ( repository_url := normalize_url( chapter.get_related_url( diff --git a/backend/apps/owasp/management/commands/owasp_scrape_committees.py b/backend/apps/owasp/management/commands/owasp_scrape_committees.py index cc472ea7fd..09d5bcfead 100644 --- a/backend/apps/owasp/management/commands/owasp_scrape_committees.py +++ b/backend/apps/owasp/management/commands/owasp_scrape_committees.py @@ -39,11 +39,15 @@ def handle(self, *args, **options) -> None: committee.deactivate() continue + committee.leaders_raw = committee.get_leaders() + if leaders_emails := committee.get_leaders_emails(): + committee.sync_leaders(leaders_emails) + # Get related URLs. scraped_urls = sorted( { repository_url - for url in set(scraper.get_urls()) + for url in set(committee.get_urls()) if ( repository_url := normalize_url( committee.get_related_url( diff --git a/backend/apps/owasp/management/commands/owasp_scrape_projects.py b/backend/apps/owasp/management/commands/owasp_scrape_projects.py index 207f37eb40..c66e115685 100644 --- a/backend/apps/owasp/management/commands/owasp_scrape_projects.py +++ b/backend/apps/owasp/management/commands/owasp_scrape_projects.py @@ -51,13 +51,16 @@ def handle(self, *args, **options) -> None: project.deactivate() continue - project.audience = scraper.get_audience() + project.audience = project.get_audience() + project.leaders_raw = project.get_leaders() + if leaders_emails := project.get_leaders_emails(): + project.sync_leaders(leaders_emails) # Get GitHub URLs. scraped_urls = sorted( { repository_url - for url in set(scraper.get_urls(domain="github.com")) + for url in set(project.get_urls(domain="github.com")) if (repository_url := normalize_url(project.get_related_url(url))) and repository_url not in {project.github_url, project.owasp_url} } diff --git a/backend/apps/owasp/migrations/0051_entitymember_member_email_entitymember_member_name_and_more.py b/backend/apps/owasp/migrations/0051_entitymember_member_email_entitymember_member_name_and_more.py new file mode 100644 index 0000000000..24fc79e789 --- /dev/null +++ b/backend/apps/owasp/migrations/0051_entitymember_member_email_entitymember_member_name_and_more.py @@ -0,0 +1,35 @@ +# Generated by Django 5.2.6 on 2025-09-06 10:44 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("github", "0035_alter_user_bio_alter_user_is_owasp_staff"), + ("owasp", "0050_alter_entitymember_role"), + ] + + operations = [ + migrations.AddField( + model_name="entitymember", + name="member_email", + field=models.EmailField(blank=True, default="", max_length=254), + ), + migrations.AddField( + model_name="entitymember", + name="member_name", + field=models.CharField(default="", max_length=255), + ), + migrations.AlterField( + model_name="entitymember", + name="member", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="+", + to="github.user", + ), + ), + ] diff --git a/backend/apps/owasp/migrations/0052_remove_entitymember_owasp_entit_member__6e516f_idx_and_more.py b/backend/apps/owasp/migrations/0052_remove_entitymember_owasp_entit_member__6e516f_idx_and_more.py new file mode 100644 index 0000000000..18ccb78d25 --- /dev/null +++ b/backend/apps/owasp/migrations/0052_remove_entitymember_owasp_entit_member__6e516f_idx_and_more.py @@ -0,0 +1,30 @@ +# Generated by Django 5.2.6 on 2025-09-11 01:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("contenttypes", "0002_remove_content_type_name"), + ("owasp", "0051_entitymember_member_email_entitymember_member_name_and_more"), + ] + + operations = [ + migrations.RemoveIndex( + model_name="entitymember", + name="owasp_entit_member__6e516f_idx", + ), + migrations.AlterUniqueTogether( + name="entitymember", + unique_together=set(), + ), + migrations.AlterField( + model_name="entitymember", + name="member_name", + field=models.CharField(max_length=255), + ), + migrations.AlterUniqueTogether( + name="entitymember", + unique_together={("entity_type", "entity_id", "member_name", "role")}, + ), + ] diff --git a/backend/apps/owasp/models/common.py b/backend/apps/owasp/models/common.py index dc24b23837..10beeb7e66 100644 --- a/backend/apps/owasp/models/common.py +++ b/backend/apps/owasp/models/common.py @@ -8,8 +8,10 @@ from urllib.parse import urlparse import yaml +from django.contrib.contenttypes.models import ContentType from django.db import models +from apps.common.models import BulkSaveModel from apps.common.open_ai import OpenAi from apps.github.constants import ( GITHUB_REPOSITORY_RE, @@ -18,6 +20,7 @@ from apps.github.models.user import User from apps.github.utils import get_repository_file_content from apps.owasp.models.entity_member import EntityMember +from apps.owasp.models.enums.project import AudienceChoices logger = logging.getLogger(__name__) @@ -101,6 +104,16 @@ def index_md_url(self) -> str | None: else None ) + @property + def info_md_url(self) -> str | None: + """Return entity's raw info.md GitHub URL.""" + return ( + "https://raw.githubusercontent.com/OWASP/" + f"{self.owasp_repository.key}/{self.owasp_repository.default_branch}/info.md" + if self.owasp_repository + else None + ) + @property def entity_leaders(self) -> models.QuerySet[User]: """Return entity's leaders.""" @@ -160,6 +173,21 @@ def generate_summary(self, prompt, open_ai=None, max_tokens=500): open_ai.set_max_tokens(max_tokens).set_prompt(prompt) self.summary = open_ai.complete() or "" + def get_audience(self): + """Get audience from info.md file on GitHub.""" + content = get_repository_file_content(self.info_md_url) + if not content: + return [] + + found_keywords = set() + + for line in content.split("\n"): + for lower_kw, original_kw in AudienceChoices.choices: + if original_kw in line: + found_keywords.add(lower_kw) + + return sorted(found_keywords) + def get_leaders(self): """Get leaders from leaders.md file on GitHub.""" content = get_repository_file_content(self.leaders_md_url) @@ -182,6 +210,26 @@ def get_leaders(self): return leaders + def get_leaders_emails(self): + """Get leaders emails from leaders.md file on GitHub.""" + content = get_repository_file_content(self.leaders_md_url) + if not content: + return {} + + leaders = {} + for line in content.split("\n"): + matches = re.findall( + r"^[-*]\s*\[([^\]]+)\]\(mailto:([^)]+)(\)|([^[<\n]))", line.strip() + ) + + for match in matches: + if match[0] and match[1]: # Name with email + leaders[match[0].strip()] = match[1].strip() + elif match[2]: # Name without email + leaders[match[2].strip()] = None + + return leaders + def get_metadata(self): """Get entity metadata.""" try: @@ -224,6 +272,19 @@ def get_related_url(self, url, exclude_domains=(), include_domains=()) -> str | return url + def get_urls(self, domain=None): + """Get URLs from info.md file on GitHub.""" + content = get_repository_file_content(self.info_md_url) + if not content: + return [] + + urls = re.findall(r"https?:\/\/[^\s\)]+", content.strip()) + + if domain: + return [url for url in urls if urlparse(url).netloc == domain] + + return urls + def parse_tags(self, tags) -> list[str]: """Parse entity tags.""" if not tags: @@ -234,3 +295,33 @@ def parse_tags(self, tags) -> list[str]: if isinstance(tags, str) else tags ) + + def sync_leaders(self, leaders_emails): + """Sync Leaders data. + + Args: + leaders_emails (dict[str, str | None]): A dictionary + where keys are the full names of the leaders + and values are their corresponding email addresses (or None if no email is provided). + + """ + content_type = ContentType.objects.get_for_model(self.__class__) + + leaders = [] + for order, (name, email) in enumerate(leaders_emails.items()): + leaders.append( + EntityMember.update_data( + { + "entity_id": self.id, + "entity_type": content_type, + "member_email": email or "", + "member_name": name, + "order": (order + 1) * 100, + "role": EntityMember.Role.LEADER, + }, + save=False, + ) + ) + + if leaders: + BulkSaveModel.bulk_save(EntityMember, leaders) diff --git a/backend/apps/owasp/models/entity_member.py b/backend/apps/owasp/models/entity_member.py index 70ffc38cfc..151fc4ac00 100644 --- a/backend/apps/owasp/models/entity_member.py +++ b/backend/apps/owasp/models/entity_member.py @@ -20,12 +20,11 @@ class Meta: unique_together = ( "entity_type", "entity_id", - "member", + "member_name", "role", ) indexes = [ models.Index(fields=["entity_type", "entity_id"]), - models.Index(fields=["member"]), ] verbose_name_plural = "Entity members" @@ -35,10 +34,6 @@ class Meta: help_text="Optional note or role description", max_length=100, ) - entity = GenericForeignKey("entity_type", "entity_id") - entity_id = models.PositiveBigIntegerField() - entity_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) - is_active = models.BooleanField( default=False, help_text="Indicates if the membership is active", @@ -47,11 +42,8 @@ class Meta: default=False, help_text="Indicates if the membership is reviewed", ) - member = models.ForeignKey( - User, - on_delete=models.CASCADE, - related_name="+", - ) + member_email = models.EmailField(blank=True, default="") + member_name = models.CharField(max_length=255) order = models.PositiveSmallIntegerField( default=0, help_text="Display order/priority of members", @@ -62,6 +54,59 @@ class Meta: default=Role.LEADER, ) + # FKs. + member = models.ForeignKey( + User, + blank=True, + null=True, + on_delete=models.CASCADE, + related_name="+", + ) + + # GFKs. + entity = GenericForeignKey("entity_type", "entity_id") + entity_id = models.PositiveBigIntegerField() + entity_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) + def __str__(self): """EntityMember human readable representation.""" - return f"{self.member.login} as {self.get_role_display()} for {self.entity}" + display_name = self.member.login if self.member else self.member_name + return f"{display_name} as {self.get_role_display()} for {self.entity}" + + @staticmethod + def update_data(data, *, save: bool = True) -> "EntityMember": + """Update entity member data.""" + try: + entity_member = EntityMember.objects.get( + entity_id=data["entity_id"], + entity_type=data["entity_type"], + member_name=data["member_name"], + role=data["role"], + ) + except EntityMember.DoesNotExist: + entity_member = EntityMember( + entity_id=data["entity_id"], + entity_type=data["entity_type"], + member_name=data["member_name"], + role=data["role"], + ) + + entity_member.from_dict(data) + if save: + entity_member.save() + + return entity_member + + def from_dict(self, data) -> None: + """Update instance based on dict data.""" + fields = { + "entity_id": data["entity_id"], + "entity_type": data["entity_type"], + "member_email": data.get("member_email", ""), + "member_name": data["member_name"], + "order": data.get("order", 0), + "role": data["role"], + } + + for key, value in fields.items(): + setattr(self, key, value) diff --git a/backend/tests/apps/owasp/management/commands/owasp_scrape_chapters_test.py b/backend/tests/apps/owasp/management/commands/owasp_scrape_chapters_test.py index a0959582ce..5746ac00f1 100644 --- a/backend/tests/apps/owasp/management/commands/owasp_scrape_chapters_test.py +++ b/backend/tests/apps/owasp/management/commands/owasp_scrape_chapters_test.py @@ -38,7 +38,7 @@ def mock_chapter(self): @mock.patch.object(Chapter, "bulk_save", autospec=True) def test_handle(self, mock_bulk_save, command, mock_chapter, offset, chapters): mock_scraper = mock.Mock(spec=OwaspScraper) - mock_scraper.get_urls.return_value = [ + mock_chapter.get_urls.return_value = [ "https://example.com/repo1", "https://example.com/repo2", "https://invalid.com/repo3", diff --git a/backend/tests/apps/owasp/management/commands/owasp_scrape_committees_test.py b/backend/tests/apps/owasp/management/commands/owasp_scrape_committees_test.py index 2fd4c5b54e..b285737217 100644 --- a/backend/tests/apps/owasp/management/commands/owasp_scrape_committees_test.py +++ b/backend/tests/apps/owasp/management/commands/owasp_scrape_committees_test.py @@ -38,7 +38,7 @@ def mock_committee(self): @mock.patch.object(Committee, "bulk_save", autospec=True) def test_handle(self, mock_bulk_save, command, mock_committee, offset, committees): mock_scraper = mock.Mock(spec=OwaspScraper) - mock_scraper.get_urls.return_value = [ + mock_committee.get_urls.return_value = [ "https://example.com/repo1", "https://example.com/repo2", "https://invalid.com/repo3", diff --git a/backend/tests/apps/owasp/management/commands/owasp_scrape_projects_test.py b/backend/tests/apps/owasp/management/commands/owasp_scrape_projects_test.py index 2693ffd395..fb312413d2 100644 --- a/backend/tests/apps/owasp/management/commands/owasp_scrape_projects_test.py +++ b/backend/tests/apps/owasp/management/commands/owasp_scrape_projects_test.py @@ -33,8 +33,8 @@ def test_audience(self, mock_github, mock_bulk_save, command, mock_project): """Test audience validation logic.""" mock_scraper = mock.Mock(spec=OwaspScraper) mock_scraper.page_tree = True - mock_scraper.get_urls.return_value = [] - mock_scraper.get_audience.return_value = ["builder", "breaker", "defender"] + mock_project.get_urls.return_value = [] + mock_project.get_audience.return_value = ["builder", "breaker", "defender"] mock_active_projects = mock.MagicMock() mock_active_projects.__iter__.return_value = iter([mock_project]) @@ -71,12 +71,12 @@ def test_audience(self, mock_github, mock_bulk_save, command, mock_project): def test_urls(self, mock_github, mock_bulk_save, command, mock_project, offset, project_count): """Tests the existing URL scraping logic, ensuring it still passes.""" mock_scraper = mock.Mock(spec=OwaspScraper) - mock_scraper.get_urls.return_value = [ + mock_project.get_urls.return_value = [ "https://github.com/org/repo1", "https://github.com/org/repo2", "https://invalid.com/repo3", ] - mock_scraper.get_audience.return_value = [] + mock_project.get_audience.return_value = [] mock_scraper.verify_url.side_effect = lambda url: None if "invalid" in url else url mock_scraper.page_tree = True diff --git a/backend/tests/apps/owasp/models/common_test.py b/backend/tests/apps/owasp/models/common_test.py index 6883ea3ff0..18c504e70e 100644 --- a/backend/tests/apps/owasp/models/common_test.py +++ b/backend/tests/apps/owasp/models/common_test.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, Mock, patch import pytest @@ -13,6 +13,40 @@ class Meta: class TestRepositoryBasedEntityModel: + def setup_method(self): + """Set up test fixtures.""" + self.content_type = Mock() + self.model = EntityModel() + self.model.id = 1 + + @pytest.mark.parametrize( + ("content", "expected_audience"), + [ + ( + """### Top Ten Card Game Information +* [Incubator Project](#) +* [Type of Project](#) +* [Version 0.0.0](#) +* [Builder](#) +* [Breaker](#)""", + ["breaker", "builder"], + ), + ("This test contains no audience information.", []), + ("", []), + (None, []), + ], + ) + def test_get_audience(self, content, expected_audience): + model = EntityModel() + repository = Repository() + repository.name = "www-project-example" + model.owasp_repository = repository + + with patch("apps.owasp.models.common.get_repository_file_content", return_value=content): + audience = model.get_audience() + + assert audience == expected_audience + @pytest.mark.parametrize( ("content", "expected_leaders"), [ @@ -48,6 +82,68 @@ def test_get_leaders(self, content, expected_leaders): assert leaders == expected_leaders + @pytest.mark.parametrize( + ("content", "expected_leaders"), + [ + ( + """### Leaders + * [First Leader](mailto:first.leader@owasp.org) + - Second Leader + * [Third Leader](mailto:third.leader@owasp.org)""", + { + "First Leader": "first.leader@owasp.org", + "Third Leader": "third.leader@owasp.org", + }, + ), + ( + """- [Alice](mailto:alice@example.com) + - [Bob](mailto:bob@example.com)""", + { + "Alice": "alice@example.com", + "Bob": "bob@example.com", + }, + ), + ( + '- Leader1', + {}, + ), + ( + """## Chapter Leaders + Here are the leaders for this chapter: + + * [Eve](mailto:eve@example.com) + - Frank + Just some random text here. + 1. Not a leader list item""", + {"Eve": "eve@example.com"}, + ), + ( + "", + {}, + ), + ( + None, + {}, + ), + ( + "* [ Spaced Leader ](mailto: spaced@owasp.org )", + { + "Spaced Leader": "spaced@owasp.org", + }, + ), + ], + ) + def test_get_leaders_emails(self, content, expected_leaders): + model = EntityModel() + repository = Repository() + repository.name = "www-project-example" + model.owasp_repository = repository + + with patch("apps.owasp.models.common.get_repository_file_content", return_value=content): + leaders_emails = model.get_leaders_emails() + + assert leaders_emails == expected_leaders + @pytest.mark.parametrize( ("content", "expected_metadata"), [ @@ -92,6 +188,42 @@ def test_get_metadata(self, content, expected_metadata): assert metadata == expected_metadata + @pytest.mark.parametrize( + ("content", "domain", "expected_urls"), + [ + ( + """* [Homepage](https://owasp.org) +* [Project Repo](https://github.com/OWASP/www-project)""", + None, + ["https://owasp.org", "https://github.com/OWASP/www-project"], + ), + ( + """* [Homepage](https://owasp.org) +* [Project Repo](https://github.com/OWASP/www-project)""", + "owasp.org", + ["https://owasp.org"], + ), + ( + """* [Homepage](https://owasp.org)""", + "example.com", + [], + ), + ("This test contains no URLs.", None, []), + ("", None, []), + (None, None, []), + ], + ) + def test_get_urls(self, content, domain, expected_urls): + model = EntityModel() + repository = Repository() + repository.name = "www-project-example" + model.owasp_repository = repository + + with patch("apps.owasp.models.common.get_repository_file_content", return_value=content): + urls = model.get_urls(domain=domain) + + assert urls == expected_urls + @pytest.mark.parametrize( ("key", "expected_url"), [ @@ -150,3 +282,44 @@ def test_parse_tags(self, tags, expected_tags): tags = model.parse_tags(tags) assert tags == expected_tags + + @patch("apps.owasp.models.common.ContentType") + @patch("apps.owasp.models.common.EntityMember") + @patch("apps.owasp.models.common.BulkSaveModel") + def test_sync_leaders_empty_dict_no_save( + self, mock_bulk_save, mock_entity_member, mock_content_type + ): + """Test sync_leaders with empty dict doesn't call bulk_save.""" + mock_content_type.objects.get_for_model.return_value = self.content_type + mock_entity_member.objects.filter.return_value = [] + + self.model.sync_leaders({}) + + mock_bulk_save.bulk_save.assert_not_called() + + @patch("apps.owasp.models.common.ContentType") + @patch("apps.owasp.models.common.EntityMember") + @patch("apps.owasp.models.common.BulkSaveModel") + def test_sync_leaders_mixed_scenario( + self, mock_bulk_save, mock_entity_member, mock_content_type + ): + """Test sync_leaders with both existing and new leaders.""" + mock_content_type.objects.get_for_model.return_value = self.content_type + + existing_leader = Mock() + existing_leader.member_name = "John Doe" + existing_leader.member_email = "old@example.com" + + mock_entity_member.objects.filter.return_value = [existing_leader] + + leaders_emails = { + "John Doe": "new@example.com", # Update existing + "Jane Smith": "jane@example.com", # New leader + } + + self.model.sync_leaders(leaders_emails) + + call_args = mock_bulk_save.bulk_save.call_args + leaders_to_save = call_args[0][1] + + assert len(leaders_to_save) == 2 # Updated existing + new leader diff --git a/cspell/custom-dict.txt b/cspell/custom-dict.txt index fff4dc5fa4..9a191d1b4a 100644 --- a/cspell/custom-dict.txt +++ b/cspell/custom-dict.txt @@ -8,6 +8,7 @@ CISSP Cañón DRF GBP +GFKs GSOC GTM Héllo