diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..d56657a
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..2cdf9da
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/pandas_task.iml b/.idea/pandas_task.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/pandas_task.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/homework.py b/homework.py
new file mode 100644
index 0000000..fca7e0e
--- /dev/null
+++ b/homework.py
@@ -0,0 +1,33 @@
+import pandas
+
+works = pandas.read_csv("works.csv").dropna()
+
+
+def count(field1, field2, jobs):
+ res = 0
+ for f1, f2 in zip(jobs[field1], jobs[field2]):
+ if not comp(f1, f2) and not comp(f2, f1):
+ res += 1
+ return res
+
+
+def comp(f1, f2):
+ array = f1.lower().replace('-', ' ').split()
+ for word in array:
+ if word in f2.lower():
+ return True
+ return False
+
+
+result = count("jobTitle", "qualification", works)
+print("Из {} людей не совпадают профессия и должность у {}".format(works.shape[0], result))
+
+print("\nТоп образований людей для менеджеров")
+print(
+ works[works['jobTitle'].str.lower().str.contains('менеджер'[:-2])]['qualification'].str.lower().value_counts().head(
+ 5))
+
+print("\nТоп должностей людей, которые по диплому являются инженерами")
+print(
+ works[works['jobTitle'].str.lower().str.contains('инженер'[:-2])]['qualification'].str.lower().value_counts().head(
+ 5))
diff --git a/proj.py b/proj.py
new file mode 100644
index 0000000..fe31781
--- /dev/null
+++ b/proj.py
@@ -0,0 +1,68 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+works = pd.read_csv("works.csv")
+print(works['skills'].str.lower().str.contains('python|питон'))
+
+works = pd.read_csv("works.csv")
+head = works.head(5)
+print(head)
+
+tail = works.tail(5)
+print(tail)
+print(works.shape[0])
+print(len(works.index))
+
+print(works[works['gender'] == 'Мужской'].shape[0])
+print((works['gender'] == 'Женский').sum())
+print(works['gender'].value_counts())
+
+print(works['skills'].notnull().sum())
+print(works.info())
+print(works['skills'].count())
+
+print(works[works['skills'].notnull()]['skills'])
+print(works['skills'].dropna())
+print(works.query("skills == skills")["skills"])
+print(works.query("salary == 15000"))
+edu = 'Высшее'
+gen = 'Женский'
+print(works.query("educationType == @edu and gender == @gen")[['salary', 'educationType','gender']])
+
+mask = works["skills"].str.lower().str.contains("python|питон") & works["skills"].notnull()
+print(works[mask]["salary"])
+
+percentiles = np.linspace(.1, 1, 10)
+
+gen = "Мужской"
+men_salary = works.query('gender == @gen').quantile(percentiles)
+fig, ax = plt.subplots()
+ax.plot(percentiles, men_salary)
+plt.xlabel('Перцентили')
+plt.ylabel('Зарплата мужчин')
+plt.show()
+
+gen = "Женский"
+women_salary = works.query('gender == @gen').quantile(percentiles)
+fig, ax = plt.subplots()
+ax.plot(percentiles, women_salary)
+plt.xlabel('Перцентили')
+plt.ylabel('Зарплата женщин')
+plt.show()
+
+gen = "Мужской"
+men_salary = works.query('gender == @gen').groupby("educationType").agg("mean").reset_index()
+men = men_salary['salary'].values
+gen = "Женский"
+women_salary = works.query('gender == @gen').groupby("educationType").agg("mean").reset_index()
+women = women_salary['salary'].values
+
+types = men_salary["educationType"].values
+id = np.arange(len(types))
+
+plt.bar(id - 0.2, men, 0.4, color="g", label = "Средняя зарплата мужчин")
+plt.bar(id + 0.2, women, 0.4, color="y", label = "Средняя зарплата женщин")
+plt.xticks(id, types, rotation=45)
+plt.legend()
+plt.show()
\ No newline at end of file