diff --git a/backend/compare/handler.py b/backend/compare/handler.py new file mode 100644 index 0000000..4e378e3 --- /dev/null +++ b/backend/compare/handler.py @@ -0,0 +1,62 @@ +import json +import sys +sys.path.append('../') +import backend.compare.helpers as helpers + +from sqlalchemy import create_engine, text +from config.config import ENDPOINT, PORT, DATABASE_NAME +from credentials.credentials import MASTER_USERNAME, MASTER_PASSWORD + +def lambda_handler(event, context): + """ + Lambda function entry point. + @event: + @context: + @return: + """ + + try: + body = event.get("body") + if not body: + raise ValueError("Missing 'body' in event") + + if isinstance(body, str): + data = json.loads(body) + if isinstance(data, str): + data = json.loads(data) + elif isinstance(body, dict): + data = body + else: + raise ValueError("Unrecognized body format") + + if "id" not in data: + raise ValueError("Missing 'id' in body") + if "suburb_list" not in data: + raise ValueError("Missing 'suburb_list' in body") + + suburb_list = data.get("suburb_list") + + engine = create_engine(f'postgresql://{MASTER_USERNAME}:{MASTER_PASSWORD}@{ENDPOINT}:{PORT}/{DATABASE_NAME}') + + try: + print("Trying to connect to database...") + with engine.connect() as connection: + connection.execute(text("SELECT 1")) + except Exception as e: + print("Connection failed:", e) + raise RuntimeError("Database connection failed") from e + + res = helpers.compare( + engine, + suburb_list + ).to_json(orient='records') + + return { + "statusCode": 200, + "body": res + } + except Exception as e: + return { + "statusCode": 400, + "body": json.dumps({"error": str(e)}) + } \ No newline at end of file diff --git a/backend/compare/helpers.py b/backend/compare/helpers.py new file mode 100644 index 0000000..e6d132e --- /dev/null +++ b/backend/compare/helpers.py @@ -0,0 +1,143 @@ +from sqlalchemy import text + +def retrieve_data(engine, suburb): + """ + Retrieve real estate and demographic data for a specific suburb. + + This helper function queries the database for a given suburb and returns + a dictionary of metrics such as postcode, state, number of properties, + average property size, inflation index, population, and median income. + + This function is intended to be used internally by higher-level functions + such as `compare`, and can be called repeatedly to build up a comparison dataset. + + Args: + engine (sqlalchemy.Engine): A SQLAlchemy engine connected to the database. + suburb (str): The name of the suburb to retrieve data for (case-insensitive). + + Returns: + dict: A dictionary containing key metrics for the suburb + + Example: + >>> retrieve_data(engine, 'Epping') + { + 'Postcode': 2121, + 'State': 'NSW', + 'Number of Properties': 148, + 'Average Property Size': 134.2, + 'Average Inflation Index': 1.15, + 'Population': 23456, + 'Median Income': 84200.0 + } + + Raises: + ValueError: If no data is found for the given suburb. + """ + + query = """ + SELECT + loc.suburb, + loc.postcode, + loc.state, + COUNT(prop.id) AS num_properties, + AVG(prop.property_size) AS avg_property_size, + AVG(prop.inflation_index) AS avg_inflation_index, + loc.population, + loc.median_income + FROM + location loc + LEFT JOIN + property prop ON prop.location_id = loc.id + WHERE + loc.suburb = :suburb + GROUP BY + loc.suburb, loc.postcode, loc.state, loc.population, loc.median_income + LIMIT 1 + """ + + with engine.connect() as connection: + result = connection.execute(text(query), {"suburb": suburb.lower()}) + row = result.fetchone() + + if not row: + raise ValueError(f"No data found for suburb: {suburb}") + + return { + 'Postcode': row.postcode, + 'State': row.state, + 'Number of Properties': row.num_properties, + 'Average Property Size': float(row.avg_property_size) if row.avg_property_size is not None else None, + 'Average Inflation Index': float(row.avg_inflation_index) if row.avg_inflation_index is not None else None, + 'Population': row.population, + 'Median Income': float(row.median_income) if row.median_income is not None else None + } + +def compare(engine, suburb_list): + """Compare key real estate and safety metrics across multiple suburbs. + + This function retrieves metrics such as average real estate price, average price per square foot, + and crime rate for each suburb provided. It returns a side-by-side comparison where each metric + is a dictionary mapping suburb names to their respective values. City-wide averages are also included + under the label 'City Average' for reference. + + Args: + engine (sqlalchemy.Engine): A SQLAlchemy engine connected to the database. + suburb_list (list of str): A list of suburb names (case-sensitive). + + Returns: + dict: A dictionary where each key is a metric such as average real estate price, + average price per sqft, etc, and the value is a dictionary of + suburb and their respective values. + + Raises: + ValueError: If the input is empty or contains invalid suburb names. + + Example: + >>> compare(['Epping', 'Haymarket', 'Surry Hills']) + { + 'Average real estate price': { + 'Epping': 1230000, + 'Haymarket': 1000000, + 'Surry Hills': 1320000 + }, + 'Average price per sqft': { + 'Epping': 750, + 'Haymarket': 700, + 'Surry Hills': 720 + }, + 'Crime rate': { + 'Epping': 'low', + 'Haymarket': 'high', + 'Surry Hills': 'medium' + }, + ... + } + """ + + if not isinstance(suburb_list, list): raise TypeError("suburb_list must be a list of strings") + if not suburb_list: raise ValueError("suburb_list cannot be empty") + if any(not isinstance(suburb, str) or not suburb.strip() for suburb in suburb_list): raise ValueError("suburb_list must contain non-empty strings only") + + # normalised_suburbs = [suburb.lower() for suburb in suburb_list] + + res = { + 'Postcode': {}, + 'State': {}, + 'Number of Properties': {}, + 'Average Property Size': {}, + 'Average Inflation Index': {}, + 'Population': {}, + 'Median Income': {} + } + + for suburb in suburb_list: + try: + data = retrieve_data(engine, suburb) + for metric, value in data.items(): + res[metric][suburb] = value + except ValueError as e: + pass + + if not res['Postcode']: raise ValueError("No valid suburbs found in the input list.") + + return res \ No newline at end of file diff --git a/backend/config/__init__.py b/backend/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/config/config.py b/backend/config/config.py new file mode 100644 index 0000000..597995e --- /dev/null +++ b/backend/config/config.py @@ -0,0 +1,3 @@ +ENDPOINT = 'historical-property-sales.chpodz4akwo5.us-east-1.rds.amazonaws.com' +PORT = '5432' +DATABASE_NAME = 'postgres' \ No newline at end of file diff --git a/backend/credentials/__init__.py b/backend/credentials/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/credentials/credentials.py b/backend/credentials/credentials.py new file mode 100644 index 0000000..36b1776 --- /dev/null +++ b/backend/credentials/credentials.py @@ -0,0 +1,2 @@ +MASTER_USERNAME = 'admin1' +MASTER_PASSWORD = 'korzuf-Fyhxy7-vihqut' \ No newline at end of file diff --git a/backend/tests/test_compare.py b/backend/tests/test_compare.py new file mode 100644 index 0000000..eb6d2c5 --- /dev/null +++ b/backend/tests/test_compare.py @@ -0,0 +1,158 @@ +import sys +sys.path.append('../') + +import unittest +from unittest.mock import MagicMock, patch +from compare.helpers import compare + +class TestCompareFunction(unittest.TestCase): + + def setUp(self): + self.mock_engine = MagicMock() + + @patch('compare.helpers.retrieve_data') + def test_compare_valid_suburbs(self, mock_retrieve_data): + mock_retrieve_data.side_effect = [ + { + 'Postcode': 2121, + 'State': 'NSW', + 'Number of Properties': 148, + 'Average Property Size': 134.2, + 'Average Inflation Index': 1.15, + 'Population': 23456, + 'Median Income': 84200.0 + }, + { + 'Postcode': 2010, + 'State': 'NSW', + 'Number of Properties': 100, + 'Average Property Size': 120.0, + 'Average Inflation Index': 1.10, + 'Population': 15000, + 'Median Income': 78000.0 + }, + { + 'Postcode': 2000, + 'State': 'NSW', + 'Number of Properties': 200, + 'Average Property Size': 110.5, + 'Average Inflation Index': 1.20, + 'Population': 30000, + 'Median Income': 90000.0 + } + ] + + suburbs = ['Epping', 'Surry Hills', 'Haymarket'] + result = compare(self.mock_engine, suburbs) + + self.assertEqual(mock_retrieve_data.call_count, len(suburbs)) + + expected_keys = [ + 'Postcode', 'State', 'Number of Properties', + 'Average Property Size', 'Average Inflation Index', + 'Population', 'Median Income' + ] + for key in expected_keys: + self.assertIn(key, result) + self.assertEqual(set(result[key].keys()), set(suburbs)) + + self.assertEqual(result['Postcode']['Epping'], 2121) + self.assertEqual(result['State']['Haymarket'], 'NSW') + self.assertAlmostEqual(result['Median Income']['Surry Hills'], 78000.0) + + @patch('compare.helpers.retrieve_data') + def test_compare_empty_suburb_list(self, mock_retrieve_data): + with self.assertRaises(ValueError): + compare(self.mock_engine, []) + + @patch('compare.helpers.retrieve_data') + def test_compare_suburb_not_found(self, mock_retrieve_data): + def side_effect(engine, suburb): + if suburb.lower() == 'unknown': + raise ValueError(f"No data found for suburb: {suburb}") + return { + 'Postcode': 1234, + 'State': 'NSW', + 'Number of Properties': 50, + 'Average Property Size': 100.0, + 'Average Inflation Index': 1.0, + 'Population': 10000, + 'Median Income': 70000.0 + } + mock_retrieve_data.side_effect = side_effect + + suburbs = ['Epping', 'Unknown'] + result = compare(self.mock_engine, suburbs) + + self.assertIn('Epping', result['Postcode']) + self.assertNotIn('Unknown', result['Postcode']) + + @patch('compare.helpers.retrieve_data') + def test_compare_with_none_values(self, mock_retrieve_data): + mock_retrieve_data.return_value = { + 'Postcode': None, + 'State': 'NSW', + 'Number of Properties': 50, + 'Average Property Size': None, + 'Average Inflation Index': 1.0, + 'Population': None, + 'Median Income': 70000.0 + } + + suburbs = ['NullSuburb'] + result = compare(self.mock_engine, suburbs) + + self.assertIsNone(result['Postcode']['NullSuburb']) + self.assertIsNone(result['Average Property Size']['NullSuburb']) + self.assertIsNone(result['Population']['NullSuburb']) + self.assertEqual(result['State']['NullSuburb'], 'NSW') + + @patch('compare.helpers.retrieve_data') + def test_compare_case_insensitivity(self, mock_retrieve_data): + mock_retrieve_data.return_value = { + 'Postcode': 1234, + 'State': 'NSW', + 'Number of Properties': 100, + 'Average Property Size': 120.0, + 'Average Inflation Index': 1.05, + 'Population': 20000, + 'Median Income': 80000.0 + } + suburbs = ['ePping', 'SuRry HIlls'] + result = compare(self.mock_engine, suburbs) + + for suburb in suburbs: + # The keys in the result dict will match input suburb strings exactly + self.assertIn(suburb, result['Postcode']) + self.assertEqual(result['State'][suburb], 'NSW') + + @patch('compare.helpers.retrieve_data') + def test_compare_all_suburbs_not_found(self, mock_retrieve_data): + mock_retrieve_data.side_effect = ValueError("No data found") + + suburbs = ['Fake1', 'Fake2'] + with self.assertRaises(ValueError): + compare(self.mock_engine, suburbs) + + @patch('compare.helpers.retrieve_data') + def test_compare_large_suburb_list(self, mock_retrieve_data): + # Return dummy data for many suburbs + mock_retrieve_data.side_effect = lambda engine, suburb: { + 'Postcode': 1000 + len(suburb), + 'State': 'NSW', + 'Number of Properties': 100, + 'Average Property Size': 120.0, + 'Average Inflation Index': 1.0, + 'Population': 10000, + 'Median Income': 70000.0 + } + + suburbs = [f'Suburb{i}' for i in range(50)] + result = compare(self.mock_engine, suburbs) + + self.assertEqual(len(result['Postcode']), 50) + self.assertEqual(set(result['Postcode'].keys()), set(suburbs)) + + +if __name__ == '__main__': + unittest.main() diff --git a/requirements.txt b/requirements.txt index 4a7cb70..bb2399b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,5 @@ scikit-learn matplotlib statsmodels requests -pytest \ No newline at end of file +pytest +sqlalchemy \ No newline at end of file