Ishaan Shah
commited on
Commit
·
8081d0b
1
Parent(s):
51917a0
lol pls work
Browse files- Dockerfile +1 -1
- api.py → app.py +0 -0
- train.py +1 -21
Dockerfile
CHANGED
@@ -6,4 +6,4 @@ COPY . .
|
|
6 |
|
7 |
RUN pip install --no-cache-dir -r requirements.txt
|
8 |
|
9 |
-
CMD ["uvicorn", "
|
|
|
6 |
|
7 |
RUN pip install --no-cache-dir -r requirements.txt
|
8 |
|
9 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
api.py → app.py
RENAMED
File without changes
|
train.py
CHANGED
@@ -7,32 +7,12 @@ product_descriptions = pd.read_csv("./train.csv")
|
|
7 |
product_descriptions = product_descriptions.dropna()
|
8 |
|
9 |
vectorizer = TfidfVectorizer(stop_words='english')
|
10 |
-
X1 = vectorizer.fit_transform(product_descriptions["
|
11 |
|
12 |
true_k = 10
|
13 |
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1)
|
14 |
model.fit(X1)
|
15 |
|
16 |
-
def show_recommendations(product):
|
17 |
-
Y = vectorizer.transform([product])
|
18 |
-
prediction = model.predict(Y)
|
19 |
-
return prediction
|
20 |
-
|
21 |
-
def print_cluster(i):
|
22 |
-
for ind in order_centroids[i, :10]:
|
23 |
-
print(' %s' % terms[ind]),
|
24 |
-
|
25 |
-
def get_cluster_terms(cluster_index):
|
26 |
-
cluster_terms = [terms[ind] for ind in order_centroids[cluster_index, :10]]
|
27 |
-
return cluster_terms
|
28 |
-
|
29 |
-
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
|
30 |
-
terms = vectorizer.get_feature_names_out()
|
31 |
-
|
32 |
-
print(print_cluster(show_recommendations("red dress")[0]))
|
33 |
-
print(print_cluster(show_recommendations("water")[0]))
|
34 |
-
print(print_cluster(show_recommendations("shoes")[0]))
|
35 |
-
print(print_cluster(show_recommendations("cutting tool")[0]))
|
36 |
|
37 |
pickle.dump(model, open("model.pkl", "wb"))
|
38 |
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))
|
|
|
7 |
product_descriptions = product_descriptions.dropna()
|
8 |
|
9 |
vectorizer = TfidfVectorizer(stop_words='english')
|
10 |
+
X1 = vectorizer.fit_transform(product_descriptions["product_descriptions"])
|
11 |
|
12 |
true_k = 10
|
13 |
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1)
|
14 |
model.fit(X1)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
pickle.dump(model, open("model.pkl", "wb"))
|
18 |
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))
|