-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstreamlit_app.py
More file actions
71 lines (54 loc) · 2.4 KB
/
streamlit_app.py
File metadata and controls
71 lines (54 loc) · 2.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import streamlit as st
import pandas as pd
import io
st.set_page_config(page_title="Excel Cleaner", page_icon="📊")
st.title("📊 Excel Cleaner")
st.write("Upload your Excel or CSV file to clean it automatically.")
uploaded_file = st.file_uploader("Choose a file", type=['xlsx', 'xls', 'csv'])
if uploaded_file is not None:
try:
# Read file
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
st.write("### Original Data Preview")
st.dataframe(df.head())
st.write(f"**Shape:** {df.shape[0]} rows, {df.shape[1]} columns")
# Cleaning Options
st.sidebar.header("Cleaning Options")
drop_duplicates = st.sidebar.checkbox("Remove Duplicates", value=True)
fill_na = st.sidebar.checkbox("Fill Missing Values", value=False)
if drop_duplicates:
df = df.drop_duplicates()
if fill_na:
fill_value = st.sidebar.text_input("Fill value (e.g., 0, N/A)", "N/A")
df = df.fillna(fill_value)
# Column selection
st.write("### Select Columns to Keep")
all_columns = df.columns.tolist()
selected_columns = st.multiselect("Columns", all_columns, default=all_columns)
if selected_columns:
df_cleaned = df[selected_columns]
st.write("### Cleaned Data Preview")
st.dataframe(df_cleaned.head())
# Download
st.write("### Download")
output = io.BytesIO()
if uploaded_file.name.endswith('.csv'):
df_cleaned.to_csv(output, index=False)
mime = "text/csv"
ext = "csv"
else:
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
df_cleaned.to_excel(writer, index=False)
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
ext = "xlsx"
st.download_button(
label=f"Download Cleaned File",
data=output.getvalue(),
file_name=f"cleaned_{uploaded_file.name}",
mime=mime
)
except Exception as e:
st.error(f"Error processing file: {e}")