Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions backend/compare/handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import json
import sys
sys.path.append('../')
import backend.compare.helpers as helpers

from sqlalchemy import create_engine, text
from config.config import ENDPOINT, PORT, DATABASE_NAME
from credentials.credentials import MASTER_USERNAME, MASTER_PASSWORD

def lambda_handler(event, context):
"""
Lambda function entry point.
@event:
@context:
@return:
"""

try:
body = event.get("body")
if not body:
raise ValueError("Missing 'body' in event")

if isinstance(body, str):
data = json.loads(body)
if isinstance(data, str):
data = json.loads(data)
elif isinstance(body, dict):
data = body
else:
raise ValueError("Unrecognized body format")

if "id" not in data:
raise ValueError("Missing 'id' in body")
if "suburb_list" not in data:
raise ValueError("Missing 'suburb_list' in body")

suburb_list = data.get("suburb_list")

engine = create_engine(f'postgresql://{MASTER_USERNAME}:{MASTER_PASSWORD}@{ENDPOINT}:{PORT}/{DATABASE_NAME}')

try:
print("Trying to connect to database...")
with engine.connect() as connection:
connection.execute(text("SELECT 1"))
except Exception as e:
print("Connection failed:", e)
raise RuntimeError("Database connection failed") from e

res = helpers.compare(
engine,
suburb_list
).to_json(orient='records')

return {
"statusCode": 200,
"body": res
}
except Exception as e:
return {
"statusCode": 400,
"body": json.dumps({"error": str(e)})
}
143 changes: 143 additions & 0 deletions backend/compare/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
from sqlalchemy import text

def retrieve_data(engine, suburb):
"""
Retrieve real estate and demographic data for a specific suburb.

This helper function queries the database for a given suburb and returns
a dictionary of metrics such as postcode, state, number of properties,
average property size, inflation index, population, and median income.

This function is intended to be used internally by higher-level functions
such as `compare`, and can be called repeatedly to build up a comparison dataset.

Args:
engine (sqlalchemy.Engine): A SQLAlchemy engine connected to the database.
suburb (str): The name of the suburb to retrieve data for (case-insensitive).

Returns:
dict: A dictionary containing key metrics for the suburb

Example:
>>> retrieve_data(engine, 'Epping')
{
'Postcode': 2121,
'State': 'NSW',
'Number of Properties': 148,
'Average Property Size': 134.2,
'Average Inflation Index': 1.15,
'Population': 23456,
'Median Income': 84200.0
}

Raises:
ValueError: If no data is found for the given suburb.
"""

query = """
SELECT
loc.suburb,
loc.postcode,
loc.state,
COUNT(prop.id) AS num_properties,
AVG(prop.property_size) AS avg_property_size,
AVG(prop.inflation_index) AS avg_inflation_index,
loc.population,
loc.median_income
FROM
location loc
LEFT JOIN
property prop ON prop.location_id = loc.id
WHERE
loc.suburb = :suburb
GROUP BY
loc.suburb, loc.postcode, loc.state, loc.population, loc.median_income
LIMIT 1
"""

with engine.connect() as connection:
result = connection.execute(text(query), {"suburb": suburb.lower()})
row = result.fetchone()

if not row:
raise ValueError(f"No data found for suburb: {suburb}")

return {
'Postcode': row.postcode,
'State': row.state,
'Number of Properties': row.num_properties,
'Average Property Size': float(row.avg_property_size) if row.avg_property_size is not None else None,
'Average Inflation Index': float(row.avg_inflation_index) if row.avg_inflation_index is not None else None,
'Population': row.population,
'Median Income': float(row.median_income) if row.median_income is not None else None
}

def compare(engine, suburb_list):
"""Compare key real estate and safety metrics across multiple suburbs.

This function retrieves metrics such as average real estate price, average price per square foot,
and crime rate for each suburb provided. It returns a side-by-side comparison where each metric
is a dictionary mapping suburb names to their respective values. City-wide averages are also included
under the label 'City Average' for reference.

Args:
engine (sqlalchemy.Engine): A SQLAlchemy engine connected to the database.
suburb_list (list of str): A list of suburb names (case-sensitive).

Returns:
dict: A dictionary where each key is a metric such as average real estate price,
average price per sqft, etc, and the value is a dictionary of
suburb and their respective values.

Raises:
ValueError: If the input is empty or contains invalid suburb names.

Example:
>>> compare(['Epping', 'Haymarket', 'Surry Hills'])
{
'Average real estate price': {
'Epping': 1230000,
'Haymarket': 1000000,
'Surry Hills': 1320000
},
'Average price per sqft': {
'Epping': 750,
'Haymarket': 700,
'Surry Hills': 720
},
'Crime rate': {
'Epping': 'low',
'Haymarket': 'high',
'Surry Hills': 'medium'
},
...
}
"""

if not isinstance(suburb_list, list): raise TypeError("suburb_list must be a list of strings")
if not suburb_list: raise ValueError("suburb_list cannot be empty")
if any(not isinstance(suburb, str) or not suburb.strip() for suburb in suburb_list): raise ValueError("suburb_list must contain non-empty strings only")

# normalised_suburbs = [suburb.lower() for suburb in suburb_list]

res = {
'Postcode': {},
'State': {},
'Number of Properties': {},
'Average Property Size': {},
'Average Inflation Index': {},
'Population': {},
'Median Income': {}
}

for suburb in suburb_list:
try:
data = retrieve_data(engine, suburb)
for metric, value in data.items():
res[metric][suburb] = value
except ValueError as e:
pass

if not res['Postcode']: raise ValueError("No valid suburbs found in the input list.")

return res
Empty file added backend/config/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions backend/config/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ENDPOINT = 'historical-property-sales.chpodz4akwo5.us-east-1.rds.amazonaws.com'
PORT = '5432'
DATABASE_NAME = 'postgres'
Empty file added backend/credentials/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions backend/credentials/credentials.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
MASTER_USERNAME = 'admin1'
MASTER_PASSWORD = 'korzuf-Fyhxy7-vihqut'
158 changes: 158 additions & 0 deletions backend/tests/test_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import sys
sys.path.append('../')

import unittest
from unittest.mock import MagicMock, patch
from compare.helpers import compare

class TestCompareFunction(unittest.TestCase):

def setUp(self):
self.mock_engine = MagicMock()

@patch('compare.helpers.retrieve_data')
def test_compare_valid_suburbs(self, mock_retrieve_data):
mock_retrieve_data.side_effect = [
{
'Postcode': 2121,
'State': 'NSW',
'Number of Properties': 148,
'Average Property Size': 134.2,
'Average Inflation Index': 1.15,
'Population': 23456,
'Median Income': 84200.0
},
{
'Postcode': 2010,
'State': 'NSW',
'Number of Properties': 100,
'Average Property Size': 120.0,
'Average Inflation Index': 1.10,
'Population': 15000,
'Median Income': 78000.0
},
{
'Postcode': 2000,
'State': 'NSW',
'Number of Properties': 200,
'Average Property Size': 110.5,
'Average Inflation Index': 1.20,
'Population': 30000,
'Median Income': 90000.0
}
]

suburbs = ['Epping', 'Surry Hills', 'Haymarket']
result = compare(self.mock_engine, suburbs)

self.assertEqual(mock_retrieve_data.call_count, len(suburbs))

expected_keys = [
'Postcode', 'State', 'Number of Properties',
'Average Property Size', 'Average Inflation Index',
'Population', 'Median Income'
]
for key in expected_keys:
self.assertIn(key, result)
self.assertEqual(set(result[key].keys()), set(suburbs))

self.assertEqual(result['Postcode']['Epping'], 2121)
self.assertEqual(result['State']['Haymarket'], 'NSW')
self.assertAlmostEqual(result['Median Income']['Surry Hills'], 78000.0)

@patch('compare.helpers.retrieve_data')
def test_compare_empty_suburb_list(self, mock_retrieve_data):
with self.assertRaises(ValueError):
compare(self.mock_engine, [])

@patch('compare.helpers.retrieve_data')
def test_compare_suburb_not_found(self, mock_retrieve_data):
def side_effect(engine, suburb):
if suburb.lower() == 'unknown':
raise ValueError(f"No data found for suburb: {suburb}")
return {
'Postcode': 1234,
'State': 'NSW',
'Number of Properties': 50,
'Average Property Size': 100.0,
'Average Inflation Index': 1.0,
'Population': 10000,
'Median Income': 70000.0
}
mock_retrieve_data.side_effect = side_effect

suburbs = ['Epping', 'Unknown']
result = compare(self.mock_engine, suburbs)

self.assertIn('Epping', result['Postcode'])
self.assertNotIn('Unknown', result['Postcode'])

@patch('compare.helpers.retrieve_data')
def test_compare_with_none_values(self, mock_retrieve_data):
mock_retrieve_data.return_value = {
'Postcode': None,
'State': 'NSW',
'Number of Properties': 50,
'Average Property Size': None,
'Average Inflation Index': 1.0,
'Population': None,
'Median Income': 70000.0
}

suburbs = ['NullSuburb']
result = compare(self.mock_engine, suburbs)

self.assertIsNone(result['Postcode']['NullSuburb'])
self.assertIsNone(result['Average Property Size']['NullSuburb'])
self.assertIsNone(result['Population']['NullSuburb'])
self.assertEqual(result['State']['NullSuburb'], 'NSW')

@patch('compare.helpers.retrieve_data')
def test_compare_case_insensitivity(self, mock_retrieve_data):
mock_retrieve_data.return_value = {
'Postcode': 1234,
'State': 'NSW',
'Number of Properties': 100,
'Average Property Size': 120.0,
'Average Inflation Index': 1.05,
'Population': 20000,
'Median Income': 80000.0
}
suburbs = ['ePping', 'SuRry HIlls']
result = compare(self.mock_engine, suburbs)

for suburb in suburbs:
# The keys in the result dict will match input suburb strings exactly
self.assertIn(suburb, result['Postcode'])
self.assertEqual(result['State'][suburb], 'NSW')

@patch('compare.helpers.retrieve_data')
def test_compare_all_suburbs_not_found(self, mock_retrieve_data):
mock_retrieve_data.side_effect = ValueError("No data found")

suburbs = ['Fake1', 'Fake2']
with self.assertRaises(ValueError):
compare(self.mock_engine, suburbs)

@patch('compare.helpers.retrieve_data')
def test_compare_large_suburb_list(self, mock_retrieve_data):
# Return dummy data for many suburbs
mock_retrieve_data.side_effect = lambda engine, suburb: {
'Postcode': 1000 + len(suburb),
'State': 'NSW',
'Number of Properties': 100,
'Average Property Size': 120.0,
'Average Inflation Index': 1.0,
'Population': 10000,
'Median Income': 70000.0
}

suburbs = [f'Suburb{i}' for i in range(50)]
result = compare(self.mock_engine, suburbs)

self.assertEqual(len(result['Postcode']), 50)
self.assertEqual(set(result['Postcode'].keys()), set(suburbs))


if __name__ == '__main__':
unittest.main()
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ scikit-learn
matplotlib
statsmodels
requests
pytest
pytest
sqlalchemy