kj4c · kj4c · May 18, 2025 · May 17, 2025 · May 17, 2025
diff --git a/backend/compare/handler.py b/backend/compare/handler.py
@@ -0,0 +1,62 @@
+import json 
+import sys
+sys.path.append('../')
+import backend.compare.helpers as helpers
+
+from sqlalchemy import create_engine, text
+from config.config import ENDPOINT, PORT, DATABASE_NAME
+from credentials.credentials import MASTER_USERNAME, MASTER_PASSWORD
+
+def lambda_handler(event, context):
+    """
+    Lambda function entry point.
+    @event:
+    @context:
+    @return:
+    """
+
+    try:
+        body = event.get("body")
+        if not body:
+            raise ValueError("Missing 'body' in event")
+
+        if isinstance(body, str):
+            data = json.loads(body)
+            if isinstance(data, str):
+                data = json.loads(data)
+        elif isinstance(body, dict):
+            data = body
+        else:
+            raise ValueError("Unrecognized body format")
+
+        if "id" not in data:
+            raise ValueError("Missing 'id' in body")
+        if "suburb_list" not in data:
+            raise ValueError("Missing 'suburb_list' in body")
+
+        suburb_list = data.get("suburb_list")
+
+        engine = create_engine(f'postgresql://{MASTER_USERNAME}:{MASTER_PASSWORD}@{ENDPOINT}:{PORT}/{DATABASE_NAME}')
+
+        try:
+            print("Trying to connect to database...")
+            with engine.connect() as connection:
+                connection.execute(text("SELECT 1"))
+        except Exception as e:
+            print("Connection failed:", e)
+            raise RuntimeError("Database connection failed") from e
+
+        res = helpers.compare(
+            engine,
+            suburb_list
+        ).to_json(orient='records')
+
+        return {
+            "statusCode": 200,
+            "body": res
+        }
+    except Exception as e:
+        return {
+            "statusCode": 400,
+            "body": json.dumps({"error": str(e)})
+        }
diff --git a/backend/compare/helpers.py b/backend/compare/helpers.py
@@ -0,0 +1,143 @@
+from sqlalchemy import text
+
+def retrieve_data(engine, suburb):
+    """
+    Retrieve real estate and demographic data for a specific suburb.
+
+    This helper function queries the database for a given suburb and returns 
+    a dictionary of metrics such as postcode, state, number of properties, 
+    average property size, inflation index, population, and median income.
+
+    This function is intended to be used internally by higher-level functions 
+    such as `compare`, and can be called repeatedly to build up a comparison dataset.
+
+    Args:
+        engine (sqlalchemy.Engine): A SQLAlchemy engine connected to the database.
+        suburb (str): The name of the suburb to retrieve data for (case-insensitive).
+
+    Returns:
+        dict: A dictionary containing key metrics for the suburb
+
+        Example:
+        >>> retrieve_data(engine, 'Epping')
+        {
+            'Postcode': 2121,
+            'State': 'NSW',
+            'Number of Properties': 148,
+            'Average Property Size': 134.2,
+            'Average Inflation Index': 1.15,
+            'Population': 23456,
+            'Median Income': 84200.0
+        }
+
+    Raises:
+        ValueError: If no data is found for the given suburb.
+    """
+
+    query = """
+        SELECT 
+            loc.suburb, 
+            loc.postcode,
+            loc.state,
+            COUNT(prop.id) AS num_properties,
+            AVG(prop.property_size) AS avg_property_size,
+            AVG(prop.inflation_index) AS avg_inflation_index,
+            loc.population,
+            loc.median_income
+        FROM 
+            location loc
+        LEFT JOIN
+            property prop ON prop.location_id = loc.id
+        WHERE
+            loc.suburb = :suburb
+        GROUP BY
+            loc.suburb, loc.postcode, loc.state, loc.population, loc.median_income
+        LIMIT 1
+    """
+
+    with engine.connect() as connection:
+        result = connection.execute(text(query), {"suburb": suburb.lower()})
+        row = result.fetchone()
+
+        if not row:
+            raise ValueError(f"No data found for suburb: {suburb}")
+
+        return {
+            'Postcode': row.postcode,
+            'State': row.state,
+            'Number of Properties': row.num_properties,
+            'Average Property Size': float(row.avg_property_size) if row.avg_property_size is not None else None,
+            'Average Inflation Index': float(row.avg_inflation_index) if row.avg_inflation_index is not None else None,
+            'Population': row.population,
+            'Median Income': float(row.median_income) if row.median_income is not None else None
+        }
+
+def compare(engine, suburb_list):
+    """Compare key real estate and safety metrics across multiple suburbs.
+
+    This function retrieves metrics such as average real estate price, average price per square foot,
+    and crime rate for each suburb provided. It returns a side-by-side comparison where each metric
+    is a dictionary mapping suburb names to their respective values. City-wide averages are also included
+    under the label 'City Average' for reference.
+
+    Args:
+        engine (sqlalchemy.Engine): A SQLAlchemy engine connected to the database.
+        suburb_list (list of str): A list of suburb names (case-sensitive).
+
+    Returns:
+        dict: A dictionary where each key is a metric such as average real estate price, 
+            average price per sqft, etc, and the value is a dictionary of 
+            suburb and their respective values.
+
+    Raises:
+        ValueError: If the input is empty or contains invalid suburb names.
+
+    Example:
+        >>> compare(['Epping', 'Haymarket', 'Surry Hills'])
+        {
+            'Average real estate price': {
+                'Epping': 1230000,
+                'Haymarket': 1000000,
+                'Surry Hills': 1320000
+            },
+            'Average price per sqft': {
+                'Epping': 750,
+                'Haymarket': 700,
+                'Surry Hills': 720
+            },
+            'Crime rate': {
+                'Epping': 'low',
+                'Haymarket': 'high',
+                'Surry Hills': 'medium'
+            },
+            ...
+        }
+    """
+
+    if not isinstance(suburb_list, list): raise TypeError("suburb_list must be a list of strings")
+    if not suburb_list: raise ValueError("suburb_list cannot be empty")
+    if any(not isinstance(suburb, str) or not suburb.strip() for suburb in suburb_list): raise ValueError("suburb_list must contain non-empty strings only")
+
+    # normalised_suburbs = [suburb.lower() for suburb in suburb_list]
+
+    res = {
+        'Postcode': {},
+        'State': {},
+        'Number of Properties': {},
+        'Average Property Size': {},
+        'Average Inflation Index': {},
+        'Population': {},
+        'Median Income': {}
+    }
+
+    for suburb in suburb_list:
+        try:
+            data = retrieve_data(engine, suburb)
+            for metric, value in data.items():
+                res[metric][suburb] = value
+        except ValueError as e:
+            pass
+
+    if not res['Postcode']: raise ValueError("No valid suburbs found in the input list.")
+
+    return res
diff --git a/backend/config/__init__.py b/backend/config/__init__.py
diff --git a/backend/config/config.py b/backend/config/config.py
@@ -0,0 +1,3 @@
+ENDPOINT = 'historical-property-sales.chpodz4akwo5.us-east-1.rds.amazonaws.com'
+PORT = '5432'
+DATABASE_NAME = 'postgres'
diff --git a/backend/credentials/__init__.py b/backend/credentials/__init__.py
diff --git a/backend/credentials/credentials.py b/backend/credentials/credentials.py
@@ -0,0 +1,2 @@
+MASTER_USERNAME = 'admin1'
+MASTER_PASSWORD = 'korzuf-Fyhxy7-vihqut'
diff --git a/backend/tests/test_compare.py b/backend/tests/test_compare.py
@@ -0,0 +1,158 @@
+import sys
+sys.path.append('../')
+
+import unittest
+from unittest.mock import MagicMock, patch
+from compare.helpers import compare
+
+class TestCompareFunction(unittest.TestCase):
+
+    def setUp(self):
+        self.mock_engine = MagicMock()
+
+    @patch('compare.helpers.retrieve_data')
+    def test_compare_valid_suburbs(self, mock_retrieve_data):
+        mock_retrieve_data.side_effect = [
+            {
+                'Postcode': 2121,
+                'State': 'NSW',
+                'Number of Properties': 148,
+                'Average Property Size': 134.2,
+                'Average Inflation Index': 1.15,
+                'Population': 23456,
+                'Median Income': 84200.0
+            },
+            {
+                'Postcode': 2010,
+                'State': 'NSW',
+                'Number of Properties': 100,
+                'Average Property Size': 120.0,
+                'Average Inflation Index': 1.10,
+                'Population': 15000,
+                'Median Income': 78000.0
+            },
+            {
+                'Postcode': 2000,
+                'State': 'NSW',
+                'Number of Properties': 200,
+                'Average Property Size': 110.5,
+                'Average Inflation Index': 1.20,
+                'Population': 30000,
+                'Median Income': 90000.0
+            }
+        ]
+
+        suburbs = ['Epping', 'Surry Hills', 'Haymarket']
+        result = compare(self.mock_engine, suburbs)
+
+        self.assertEqual(mock_retrieve_data.call_count, len(suburbs))
+
+        expected_keys = [
+            'Postcode', 'State', 'Number of Properties', 
+            'Average Property Size', 'Average Inflation Index', 
+            'Population', 'Median Income'
+        ]
+        for key in expected_keys:
+            self.assertIn(key, result)
+            self.assertEqual(set(result[key].keys()), set(suburbs))
+
+        self.assertEqual(result['Postcode']['Epping'], 2121)
+        self.assertEqual(result['State']['Haymarket'], 'NSW')
+        self.assertAlmostEqual(result['Median Income']['Surry Hills'], 78000.0)
+
+    @patch('compare.helpers.retrieve_data')
+    def test_compare_empty_suburb_list(self, mock_retrieve_data):
+        with self.assertRaises(ValueError):
+            compare(self.mock_engine, [])
+
+    @patch('compare.helpers.retrieve_data')
+    def test_compare_suburb_not_found(self, mock_retrieve_data):
+        def side_effect(engine, suburb):
+            if suburb.lower() == 'unknown':
+                raise ValueError(f"No data found for suburb: {suburb}")
+            return {
+                'Postcode': 1234,
+                'State': 'NSW',
+                'Number of Properties': 50,
+                'Average Property Size': 100.0,
+                'Average Inflation Index': 1.0,
+                'Population': 10000,
+                'Median Income': 70000.0
+            }
+        mock_retrieve_data.side_effect = side_effect
+
+        suburbs = ['Epping', 'Unknown']
+        result = compare(self.mock_engine, suburbs)
+
+        self.assertIn('Epping', result['Postcode'])
+        self.assertNotIn('Unknown', result['Postcode'])
+
+    @patch('compare.helpers.retrieve_data')
+    def test_compare_with_none_values(self, mock_retrieve_data):
+        mock_retrieve_data.return_value = {
+            'Postcode': None,
+            'State': 'NSW',
+            'Number of Properties': 50,
+            'Average Property Size': None,
+            'Average Inflation Index': 1.0,
+            'Population': None,
+            'Median Income': 70000.0
+        }
+
+        suburbs = ['NullSuburb']
+        result = compare(self.mock_engine, suburbs)
+
+        self.assertIsNone(result['Postcode']['NullSuburb'])
+        self.assertIsNone(result['Average Property Size']['NullSuburb'])
+        self.assertIsNone(result['Population']['NullSuburb'])
+        self.assertEqual(result['State']['NullSuburb'], 'NSW')
+
+    @patch('compare.helpers.retrieve_data')
+    def test_compare_case_insensitivity(self, mock_retrieve_data):
+        mock_retrieve_data.return_value = {
+            'Postcode': 1234,
+            'State': 'NSW',
+            'Number of Properties': 100,
+            'Average Property Size': 120.0,
+            'Average Inflation Index': 1.05,
+            'Population': 20000,
+            'Median Income': 80000.0
+        }
+        suburbs = ['ePping', 'SuRry HIlls']
+        result = compare(self.mock_engine, suburbs)
+
+        for suburb in suburbs:
+            # The keys in the result dict will match input suburb strings exactly
+            self.assertIn(suburb, result['Postcode'])
+            self.assertEqual(result['State'][suburb], 'NSW')
+
+    @patch('compare.helpers.retrieve_data')
+    def test_compare_all_suburbs_not_found(self, mock_retrieve_data):
+        mock_retrieve_data.side_effect = ValueError("No data found")
+
+        suburbs = ['Fake1', 'Fake2']
+        with self.assertRaises(ValueError):
+            compare(self.mock_engine, suburbs)
+
+    @patch('compare.helpers.retrieve_data')
+    def test_compare_large_suburb_list(self, mock_retrieve_data):
+        # Return dummy data for many suburbs
+        mock_retrieve_data.side_effect = lambda engine, suburb: {
+            'Postcode': 1000 + len(suburb),
+            'State': 'NSW',
+            'Number of Properties': 100,
+            'Average Property Size': 120.0,
+            'Average Inflation Index': 1.0,
+            'Population': 10000,
+            'Median Income': 70000.0
+        }
+
+        suburbs = [f'Suburb{i}' for i in range(50)]
+        result = compare(self.mock_engine, suburbs)
+
+        self.assertEqual(len(result['Postcode']), 50)
+        self.assertEqual(set(result['Postcode'].keys()), set(suburbs))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/requirements.txt b/requirements.txt
@@ -11,4 +11,5 @@ scikit-learn
 matplotlib
 statsmodels
 requests
-pytest
+pytest
+sqlalchemy
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		MASTER_USERNAME = 'admin1'
		MASTER_PASSWORD = 'korzuf-Fyhxy7-vihqut'