Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions website/meta/universe.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"id": "TeNs",
"title": "Temporal Expressions Normalization spaCy",
"thumb": "https://github-production-user-asset-6210df.s3.amazonaws.com/40547052/433595900-fae3c9d9-7181-4d8b-8b49-e6dc4fca930b.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20250414%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250414T235545Z&X-Amz-Expires=300&X-Amz-Signature=e21d3c06300ceb15fa1dadd7cb60081cc9f1b35e5a7bfd07f6e8b90dd7fad9d0&X-Amz-SignedHeaders=host",
"thumb": "https://raw.githubusercontent.com/iliedorobat/timespan-normalization-spacy/refs/heads/main/icon.png",
"url": "https://pypi.org/project/temporal-normalization-spacy/",
"slogan": "A temporal expression normalization plugin for Romanian using rule-based methods and DBpedia mappings.",
"description": "**[Temporal Expressions Normalization spaCy (TeNs)](https://github.com/iliedorobat/timespan-normalization-spacy)** is a powerful pipeline component for spaCy that seamlessly identifies and parses date entities in text. It leverages the **[Temporal Expressions Normalization Framework]( https://github.com/iliedorobat/timespan-normalization)** to recognize a wide variety of date formats using an extensive set of regular expressions (RegEx), ensuring robust and adaptable date extraction across diverse textual sources.\n\nUnlike conventional solutions that primarily focus on well-structured date formats, TeNs excels in handling real-world text by **identifying** not only standard date representations but also **abbreviated, informal, or even misspelled temporal expressions.** This makes it particularly effective for processing noisy or unstructured data, such as historical records, user-generated content, and scanned documents with OCR inaccuracies.",
Expand Down Expand Up @@ -34,21 +34,25 @@
"",
"# Display information about the identified and normalized dates in the text.",
"for entity in doc.ents:",
" edges = entity._.time_series.edges",
" time_series = entity._.time_series",
"",
" print('Start Edge:')",
" print(edges.start.serialize('\\t'))",
" print()",
" if isinstance(time_series, list):",
" for ts in time_series:",
" edges = ts.edges",
"",
" print('End Edge:')",
" print(edges.end.serialize('\\t'))",
" print()",
" print('Start Edge:')",
" print(edges.start.serialize('\\t'))",
" print()",
"",
" print('Periods:')",
" for period in entity._.time_series.periods:",
" print(period.serialize('\\t'))",
" print()",
" print('---------------------')"
" print('End Edge:')",
" print(edges.end.serialize('\\t'))",
" print()",
"",
" print('Periods:')",
" for period in ts.periods:",
" print(period.serialize('\\t'))",
" print()",
" print('---------------------')"
],
"code_language": "python",
"author": "Ilie Cristian Dorobat",
Expand Down
Loading