-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_vectors.cpp
More file actions
110 lines (102 loc) · 2.62 KB
/
get_vectors.cpp
File metadata and controls
110 lines (102 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include <iostream>
#include <vector>
#include <string>
#include <sys/types.h>
#include <dirent.h>
#include <fstream>
#include <algorithm>
#include <streambuf>
#define NUM_VECTORS 1000
using namespace std;
vector<string> pos,neg;//words
vector<string> pos_data,neg_data;//filepaths
int num_pos=0,num_neg=0;
int num_pos_data=0,num_neg_data=0;
vector<int> get_vector(string filepath)
{
ifstream f(filepath.c_str());
vector<int> result(num_neg+num_pos);
for(int i=0;i<num_neg+num_pos;i++)
result[i]=0;
string temp;
string str((istreambuf_iterator<char>(f)),istreambuf_iterator<char>());
//cout << str << endl;
vector<string>::iterator it;
unsigned long index1 = str.find_first_of(" .,?!:;'""()$#{}][/|\\`~@%^*\n"),index0=0;
f.close();
//cout << "-------" << endl;
while(index1!=string::npos)
{
temp = str.substr(index0,index1-index0);
//cout << temp << ' ';
if((it=find(pos.begin(),pos.end(),temp))!=pos.end())
result[it-pos.begin()]++;
else if((it=find(neg.begin(),neg.end(),temp))!=neg.end())
result[num_pos+(it-neg.begin())]++;
index0=index1+1;
index1= str.find_first_of(" .,?!:;""'()$#{}][/|\\`~@%^*\n",index1+1);
}
return result;
}
int main()
{
ifstream fpos("./lexicon/pos.txt"),fneg("./lexicon/neg.txt");
string temp;
for(num_pos=0;;num_pos++)
{
fpos >> temp;
if(!fpos.eof()) pos.push_back(temp);
else break;
}
for(num_neg=0;;num_neg++)
{
fneg >> temp;
if(!fneg.eof()) neg.push_back(temp);
else break;
}
cout << num_pos << " " << num_neg << endl;
fpos.close(); fneg.close();
DIR *dpos=opendir("./train_set/pos/");
DIR *dneg=opendir("./train_set/neg/");
struct dirent *dir = readdir(dpos);
//pos
while(dir->d_name[0]=='.') dir = readdir(dpos);
for(num_pos_data=0; dir!=NULL; num_pos_data++)
{
pos_data.push_back(string(dir->d_name));
dir = readdir(dpos);
}
cout << num_pos_data << endl;
closedir(dpos);
//neg
dir = readdir(dneg);
while(dir->d_name[0]=='.') dir = readdir(dneg);
for(num_neg_data=0; dir!=NULL; num_neg_data++)
{
neg_data.push_back(string(dir->d_name));
dir = readdir(dneg);
}
cout << num_neg_data << endl;
closedir(dneg);
ofstream vectors("./vectors.data");
int size = num_neg+num_pos;
vectors << 2*NUM_VECTORS << ' ' << size << ' ' << '1' << endl;
for(int i=0;i<NUM_VECTORS;i++)
{
vector<int> temp = get_vector("./train_set/pos/"+pos_data[i]);
for(int j=0; j<size;j++)
{
vectors << temp[j] << ' ';
}
vectors << endl << '1' << endl;
temp = get_vector("./train_set/neg/"+neg_data[i]);
for(int j=0; j<size;j++)
{
vectors << temp[j] << ' ';
}
vectors << endl << "-1" << endl;
}
vectors.close();
cout << "vectors" << endl;
return 0;
}