-
Notifications
You must be signed in to change notification settings - Fork 197
Add time-zone aware timestamp normalization transformer with tests #680
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| import unittest | ||
|
||
| from petl.transform.normalize_timezone import normalize_timezone | ||
|
|
||
| class TestNormalizeTimezone(unittest.TestCase): | ||
|
||
|
|
||
| def test_basic_conversion(self): | ||
|
||
| input_data = [ | ||
| {'timestamp': '2023-12-01T10:00:00', 'timezone': 'America/New_York'}, | ||
| {'timestamp': '2023-12-01T15:00:00', 'timezone': 'Europe/London'} | ||
| ] | ||
| result = list(normalize_timezone(input_data)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like python3.6 doesn't work with this: Can you rework this test to be skipped when |
||
| self.assertEqual(result[0]['timestamp_utc'], '2023-12-01T15:00:00+00:00') | ||
| self.assertEqual(result[1]['timestamp_utc'], '2023-12-01T15:00:00+00:00') | ||
| self.assertEqual(result[0]['timezone_original'], 'America/New_York') | ||
|
|
||
| def test_invalid_timezone(self): | ||
|
||
| input_data = [{'timestamp': '2023-12-01T10:00:00', 'timezone': 'Invalid/Zone'}] | ||
| with self.assertRaises(ValueError): | ||
| list(normalize_timezone(input_data)) | ||
|
|
||
| def test_missing_timestamp(self): | ||
|
||
| input_data = [{'timezone': 'UTC'}] | ||
| with self.assertRaises(ValueError): | ||
| list(normalize_timezone(input_data)) | ||
|
|
||
| if __name__ == '__main__': | ||
|
||
| unittest.main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| from datetime import datetime | ||
|
||
| import pytz | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, Due to this, the CI jobs running on windows are failing with the following error: Would you mind making the |
||
|
|
||
| def normalize_timezone(table, timestamp_col='timestamp', tz_col='timezone'): | ||
| """ | ||
| Normalize timestamps to UTC while retaining original timezone. | ||
|
|
||
| Args: | ||
| table: petl table (iterable of rows/dicts) | ||
| timestamp_col (str): column name with timestamp strings | ||
| tz_col (str): column name with timezone name (e.g., 'America/New_York') | ||
|
|
||
| Yields: | ||
| Each row with two added fields: 'timestamp_utc' and 'timezone_original' | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding a code example here would be interesting, but not required. |
||
| """ | ||
| for row in table: | ||
| try: | ||
| original_ts = row[timestamp_col] | ||
| original_tz = row[tz_col] | ||
|
|
||
| # Parse the timestamp | ||
| naive_dt = datetime.fromisoformat(original_ts) | ||
Check warningCode scanning / Pylint (reported by Codacy) Class 'datetime' has no 'fromisoformat' member Warning
Class 'datetime' has no 'fromisoformat' member
|
||
|
|
||
| # Attach original timezone | ||
| local_dt = pytz.timezone(original_tz).localize(naive_dt) | ||
|
|
||
| # Convert to UTC | ||
| utc_dt = local_dt.astimezone(pytz.UTC) | ||
|
|
||
| # Create a new row with original + new fields | ||
| new_row = dict(row) | ||
| new_row['timestamp_utc'] = utc_dt.isoformat() | ||
| new_row['timezone_original'] = original_tz | ||
|
|
||
| yield new_row | ||
|
|
||
| except Exception as e: | ||
|
||
| raise ValueError(f"Failed to normalize row {row} due to error: {e}") | ||
|
||
Uh oh!
There was an error while loading. Please reload this page.