b3ndur commited on
Commit
11353d1
·
1 Parent(s): 6b54ff1
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ . filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from groq import Groq
3
+ import gradio as gr
4
+ import numpy as np
5
+ from sentence_transformers import SentenceTransformer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import sqlite3
8
+ import pandas as pd
9
+ from tqdm import tqdm
10
+
11
+ # Get the Groq API key from environment variables (in Hugging Face, this is stored as a secret)
12
+ client = Groq(
13
+ # This is the default and can be omitted
14
+ api_key=os.environ.get("GROQ_API_KEY"),
15
+ )
16
+
17
+
18
+ con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False)
19
+ con.row_factory = sqlite3.Row
20
+ cur = con.cursor()
21
+
22
+ # create table if not exists
23
+
24
+ cur.execute("""
25
+ CREATE TABLE IF NOT EXISTS places (
26
+ Place_Id INTEGER PRIMARY KEY, -- SQLite auto-increments INTEGER PRIMARY KEY automatically
27
+ Place_Name TEXT NOT NULL, -- SQLite uses TEXT instead of VARCHAR
28
+ Description TEXT,
29
+ Category TEXT,
30
+ City TEXT,
31
+ Price REAL, -- SQLite uses REAL instead of DECIMAL or FLOAT
32
+ Rating REAL,
33
+ Embedding TEXT
34
+ );
35
+ """)
36
+
37
+
38
+ data = pd.read_csv('dataset/tourism_place.csv')
39
+
40
+
41
+ # check if the table is empty
42
+ cur.execute("SELECT * FROM places")
43
+
44
+ if cur.fetchone() is None:
45
+ # Store the places in the database
46
+ for i in tqdm(range(len(data))):
47
+ cur.execute("""
48
+ INSERT INTO places (Place_Name, Description, Category, City, Price, Rating)
49
+ VALUES (?, ?, ?, ?, ?, ?)
50
+ """, (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i]))
51
+ )
52
+
53
+ # Commit the changes to the database
54
+ con.commit()
55
+
56
+ # Compute and store embeddings
57
+ def compute_and_store_embeddings():
58
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
59
+
60
+ # Select all places from the database
61
+ cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places")
62
+ places = cur.fetchall()
63
+
64
+ for place in places:
65
+ # Combine PlaceName, Category, Description, and City into one string
66
+ text = f"{place[1]} {place[2]} {place[3]} {place[4]}"
67
+
68
+ # Generate embedding for the combined text
69
+ embedding = model.encode(text)
70
+
71
+ # Convert embedding to a string format to store in the database
72
+ embedding_str = ','.join([str(x) for x in embedding])
73
+
74
+ # Update the place in the database with the embedding
75
+ cur.execute(
76
+ "UPDATE places SET Embedding = ? WHERE Place_Id = ?",
77
+ (embedding_str, place[0])
78
+ )
79
+
80
+ # Commit the changes to the database
81
+ con.commit()
82
+ # Run the function to compute and store embeddings
83
+ compute_and_store_embeddings()
84
+
85
+
86
+ # Load Hugging Face model for generating embeddings
87
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
88
+
89
+ # Normalize user query using Groq VM
90
+ def normalize_query(user_query):
91
+ try:
92
+ response = client.chat.completions.create(
93
+ model="llama-3.1-70b-versatile",
94
+ messages=[{
95
+ "role": "user",
96
+ "content": f"""
97
+ Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City.
98
+ Return the response as: "Place name, Category, Description, City".
99
+ """
100
+ }]
101
+ )
102
+ normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip()
103
+ return normalized_user_query
104
+ except Exception as e:
105
+ print(f"Error normalizing query: {e}")
106
+ return ""
107
+
108
+ # Generate user embedding using Hugging Face model
109
+ def get_user_embedding(query):
110
+ try:
111
+ return model.encode(query)
112
+ except Exception as e:
113
+ print(f"Error generating embedding: {e}")
114
+ return np.zeros(512) # Assuming 384 as default embedding size
115
+
116
+ # Find similar places based on cosine similarity
117
+ def get_similar_places(user_embedding):
118
+ similarities = []
119
+ # Select all places from the database
120
+ res = cur.execute("SELECT * FROM places").fetchall()
121
+
122
+ for place in res:
123
+ embedding_str = place['Embedding'] # Assuming embeddings are stored as comma-separated strings in the database
124
+ embedding = np.array([float(x) for x in embedding_str.split(',')]) # Convert the string back to a numpy array
125
+
126
+ # Compute cosine similarity
127
+ similarity = cosine_similarity([user_embedding], [embedding])[0][0]
128
+ similarities.append((place, similarity))
129
+
130
+ # Sort results based on similarity and then by rating
131
+ ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)
132
+
133
+ # Return top places
134
+ return ranked_results
135
+
136
+ # Main function to get top 5 destinations
137
+ def get_top_5_destinations(user_query):
138
+ normalized_query = normalize_query(user_query)
139
+ user_embedding = get_user_embedding(normalized_query)
140
+ similar_places = get_similar_places(user_embedding)
141
+
142
+ if not similar_places:
143
+ return "Tidak ada tempat yang ditemukan."
144
+
145
+ top_places = []
146
+ for i, (place, similarity) in enumerate(similar_places):
147
+ top_places.append({
148
+ 'name': place['Place_Name'],
149
+ 'city': place['City'],
150
+ 'category': place['Category'],
151
+ 'rating': place['Rating'],
152
+ 'description': place['Description'],
153
+ 'similarity': similarity
154
+ })
155
+
156
+ return top_places
157
+
158
+ # Generate response to user using Groq VM
159
+ def generate_response(user_query, top_places):
160
+ try:
161
+ # Prepare the destinations data in JSON format for the model to use directly
162
+ destinations_data = ", ".join([
163
+ f'{{"name": "{place["name"]}", "city": "{place["city"]}", "category": "{place["category"]}", "rating": {place["rating"]}, "description": "{place["description"]}"}}'
164
+ for place in top_places
165
+ ])
166
+
167
+ # System prompt: Simplified and focused on returning only the recommendations
168
+ system_prompt = """
169
+ You are a tour guide assistant. Your task is to present the following tourism recommendations to the user in Bahasa Indonesia.
170
+ - For each destination, include the name, city, category, rating, and a short description.
171
+ - Do not provide any additional commentary.
172
+ - Only return the provided data in a clear and concise format.
173
+ """
174
+
175
+ # Generate the response using the model
176
+ response = client.chat.completions.create(
177
+ model="llama-3.1-70b-versatile",
178
+ messages=[
179
+ {"role": "system", "content": system_prompt}, # System prompt defines behavior
180
+ {"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"}
181
+ ]
182
+ )
183
+
184
+ # Return the response content generated by the model
185
+ return response.choices[0].message.content
186
+ except Exception as e:
187
+ print(f"Error generating response: {e}")
188
+ return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi."
189
+
190
+ # Gradio Interface - User Input and Output
191
+ def chatbot(user_query):
192
+ # Step 1: Get the top 5 destinations
193
+ top_places = get_top_5_destinations(user_query)
194
+
195
+ if isinstance(top_places, str): # Error case, e.g. "No places found"
196
+ return top_places
197
+
198
+ # Step 2: Generate the chatbot's response
199
+ # response = generate_response(user_query, top_places)
200
+
201
+ # only the first 5 element of top_places
202
+ response = generate_response(user_query, top_places[:5])
203
+
204
+ return response
205
+
206
+ # Define Gradio Interface
207
+ iface = gr.Interface(
208
+ fn=chatbot,
209
+ inputs="text",
210
+ outputs="text",
211
+ title="Tourism Recommendation Chatbot",
212
+ description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!"
213
+ )
214
+
215
+ # Launch the Gradio App
216
+ if __name__ == "__main__":
217
+ iface.launch(share=True)
dataset/tourism_place.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/user.csv ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ User_Id,Location,Age
2
+ 1,"Semarang, Jawa Tengah",20
3
+ 2,"Bekasi, Jawa Barat",21
4
+ 3,"Cirebon, Jawa Barat",23
5
+ 4,"Bekasi, Jawa Barat",21
6
+ 5,"Lampung, Sumatera Selatan",20
7
+ 6,"Jakarta Utara, DKI Jakarta",18
8
+ 7,"Jakarta Selatan, DKI Jakarta",39
9
+ 8,"Bandung, Jawa Barat",40
10
+ 9,"Surabaya, Jawa Timur",38
11
+ 10,"Bekasi, Jawa Barat",39
12
+ 11,"Yogyakarta, DIY",20
13
+ 12,"Bogor, Jawa Barat",37
14
+ 13,"Depok, Jawa Barat",18
15
+ 14,"Jakarta Pusat, DKI Jakarta",26
16
+ 15,"Jakarta Timur, DKI Jakarta",34
17
+ 16,"Bekasi, Jawa Barat",40
18
+ 17,"Semarang, Jawa Tengah",31
19
+ 18,"Yogyakarta, DIY",39
20
+ 19,"Cirebon, Jawa Barat",26
21
+ 20,"Lampung, Sumatera Selatan",30
22
+ 21,"Semarang, Jawa Tengah",30
23
+ 22,"Subang, Jawa Barat",25
24
+ 23,"Jakarta Barat, DKI Jakarta",37
25
+ 24,"Bekasi, Jawa Barat",36
26
+ 25,"Lampung, Sumatera Selatan",40
27
+ 26,"Palembang, Sumatera Selatan",38
28
+ 27,"Bogor, Jawa Barat",22
29
+ 28,"Sragen, Jawa Tengah",38
30
+ 29,"Ponorogo, Jawa Timur",34
31
+ 30,"Depok, Jawa Barat",30
32
+ 31,"Jakarta Selatan, DKI Jakarta",26
33
+ 32,"Bekasi, Jawa Barat",34
34
+ 33,"Ponorogo, Jawa Timur",24
35
+ 34,"Sragen, Jawa Tengah",31
36
+ 35,"Klaten, Jawa Tengah",24
37
+ 36,"Solo, Jawa Tengah",20
38
+ 37,"Tanggerang, Banten",21
39
+ 38,"Serang, Banten",26
40
+ 39,"Cilacap, Jawa Tengah",32
41
+ 40,"Semarang, Jawa Tengah",27
42
+ 41,"Yogyakarta, DIY",24
43
+ 42,"Kota Gede, DIY",37
44
+ 43,"Cirebon, Jawa Barat",33
45
+ 44,"Subang, Jawa Barat",21
46
+ 45,"Karawang, Jawa Barat",25
47
+ 46,"Purwakarat, Jawa Barat",38
48
+ 47,"Jakarta Utara, DKI Jakarta",31
49
+ 48,"Jakarta Selatan, DKI Jakarta",35
50
+ 49,"Jakarta Pusat, DKI Jakarta",24
51
+ 50,"Bekasi, Jawa Barat",19
52
+ 51,"Bogor, Jawa Barat",29
53
+ 52,"Karawang, Jawa Barat",18
54
+ 53,"Subang, Jawa Barat",28
55
+ 54,"Serang, Banten",31
56
+ 55,"Tanggerang, Banten",19
57
+ 56,"Kota Gede, DIY",18
58
+ 57,"Yogyakarta, DIY",21
59
+ 58,"Lampung, Sumatera Selatan",21
60
+ 59,"Ponorogo, Jawa Timur",26
61
+ 60,"Surabaya, Jawa Timur",25
62
+ 61,"Nganjuk, Jawa Timur",23
63
+ 62,"Madura, Jawa Timur",36
64
+ 63,"Semarang, Jawa Tengah",35
65
+ 64,"Bandung, Jawa Barat",38
66
+ 65,"Yogyakarta, DIY",22
67
+ 66,"Bekasi, Jawa Barat",33
68
+ 67,"Surabaya, Jawa Timur",25
69
+ 68,"Kota Gede, DIY",37
70
+ 69,"Bekasi, Jawa Barat",24
71
+ 70,"Bogor, Jawa Barat",21
72
+ 71,"Depok, Jawa Barat",39
73
+ 72,"Tanggerang, Banten",19
74
+ 73,"Serang, Banten",29
75
+ 74,"Semarang, Jawa Tengah",30
76
+ 75,"Bekasi, Jawa Barat",19
77
+ 76,"Cirebon, Jawa Barat",19
78
+ 77,"Bekasi, Jawa Barat",34
79
+ 78,"Lampung, Sumatera Selatan",22
80
+ 79,"Jakarta Utara, DKI Jakarta",31
81
+ 80,"Jakarta Selatan, DKI Jakarta",32
82
+ 81,"Bandung, Jawa Barat",29
83
+ 82,"Surabaya, Jawa Timur",24
84
+ 83,"Bekasi, Jawa Barat",32
85
+ 84,"Yogyakarta, DIY",35
86
+ 85,"Bogor, Jawa Barat",31
87
+ 86,"Depok, Jawa Barat",32
88
+ 87,"Jakarta Pusat, DKI Jakarta",30
89
+ 88,"Jakarta Timur, DKI Jakarta",27
90
+ 89,"Bekasi, Jawa Barat",35
91
+ 90,"Semarang, Jawa Tengah",29
92
+ 91,"Yogyakarta, DIY",32
93
+ 92,"Cirebon, Jawa Barat",39
94
+ 93,"Lampung, Sumatera Selatan",18
95
+ 94,"Semarang, Jawa Tengah",34
96
+ 95,"Subang, Jawa Barat",27
97
+ 96,"Jakarta Barat, DKI Jakarta",19
98
+ 97,"Bekasi, Jawa Barat",38
99
+ 98,"Lampung, Sumatera Selatan",28
100
+ 99,"Palembang, Sumatera Selatan",33
101
+ 100,"Bogor, Jawa Barat",28
102
+ 101,"Sragen, Jawa Tengah",39
103
+ 102,"Ponorogo, Jawa Timur",21
104
+ 103,"Depok, Jawa Barat",30
105
+ 104,"Jakarta Selatan, DKI Jakarta",27
106
+ 105,"Bekasi, Jawa Barat",37
107
+ 106,"Ponorogo, Jawa Timur",32
108
+ 107,"Sragen, Jawa Tengah",24
109
+ 108,"Klaten, Jawa Tengah",33
110
+ 109,"Solo, Jawa Tengah",18
111
+ 110,"Tanggerang, Banten",39
112
+ 111,"Serang, Banten",38
113
+ 112,"Cilacap, Jawa Tengah",18
114
+ 113,"Semarang, Jawa Tengah",38
115
+ 114,"Yogyakarta, DIY",39
116
+ 115,"Kota Gede, DIY",26
117
+ 116,"Cirebon, Jawa Barat",23
118
+ 117,"Subang, Jawa Barat",30
119
+ 118,"Karawang, Jawa Barat",32
120
+ 119,"Purwakarat, Jawa Barat",25
121
+ 120,"Jakarta Utara, DKI Jakarta",32
122
+ 121,"Jakarta Selatan, DKI Jakarta",35
123
+ 122,"Jakarta Pusat, DKI Jakarta",24
124
+ 123,"Bekasi, Jawa Barat",32
125
+ 124,"Bogor, Jawa Barat",22
126
+ 125,"Karawang, Jawa Barat",24
127
+ 126,"Subang, Jawa Barat",32
128
+ 127,"Serang, Banten",38
129
+ 128,"Tanggerang, Banten",28
130
+ 129,"Kota Gede, DIY",29
131
+ 130,"Yogyakarta, DIY",27
132
+ 131,"Lampung, Sumatera Selatan",21
133
+ 132,"Ponorogo, Jawa Timur",31
134
+ 133,"Surabaya, Jawa Timur",29
135
+ 134,"Nganjuk, Jawa Timur",27
136
+ 135,"Madura, Jawa Timur",22
137
+ 136,"Semarang, Jawa Tengah",35
138
+ 137,"Bandung, Jawa Barat",28
139
+ 138,"Yogyakarta, DIY",25
140
+ 139,"Bekasi, Jawa Barat",33
141
+ 140,"Surabaya, Jawa Timur",25
142
+ 141,"Kota Gede, DIY",34
143
+ 142,"Bekasi, Jawa Barat",29
144
+ 143,"Bogor, Jawa Barat",25
145
+ 144,"Depok, Jawa Barat",18
146
+ 145,"Tanggerang, Banten",37
147
+ 146,"Serang, Banten",38
148
+ 147,"Semarang, Jawa Tengah",18
149
+ 148,"Bekasi, Jawa Barat",33
150
+ 149,"Cirebon, Jawa Barat",30
151
+ 150,"Bekasi, Jawa Barat",40
152
+ 151,"Lampung, Sumatera Selatan",36
153
+ 152,"Jakarta Utara, DKI Jakarta",25
154
+ 153,"Jakarta Selatan, DKI Jakarta",39
155
+ 154,"Bandung, Jawa Barat",31
156
+ 155,"Surabaya, Jawa Timur",37
157
+ 156,"Bekasi, Jawa Barat",31
158
+ 157,"Yogyakarta, DIY",28
159
+ 158,"Bogor, Jawa Barat",23
160
+ 159,"Depok, Jawa Barat",20
161
+ 160,"Jakarta Pusat, DKI Jakarta",36
162
+ 161,"Jakarta Timur, DKI Jakarta",33
163
+ 162,"Bekasi, Jawa Barat",25
164
+ 163,"Semarang, Jawa Tengah",40
165
+ 164,"Yogyakarta, DIY",19
166
+ 165,"Cirebon, Jawa Barat",23
167
+ 166,"Lampung, Sumatera Selatan",26
168
+ 167,"Semarang, Jawa Tengah",19
169
+ 168,"Subang, Jawa Barat",38
170
+ 169,"Jakarta Barat, DKI Jakarta",18
171
+ 170,"Bekasi, Jawa Barat",19
172
+ 171,"Lampung, Sumatera Selatan",33
173
+ 172,"Palembang, Sumatera Selatan",34
174
+ 173,"Bogor, Jawa Barat",29
175
+ 174,"Sragen, Jawa Tengah",32
176
+ 175,"Ponorogo, Jawa Timur",36
177
+ 176,"Depok, Jawa Barat",31
178
+ 177,"Jakarta Selatan, DKI Jakarta",35
179
+ 178,"Bekasi, Jawa Barat",33
180
+ 179,"Ponorogo, Jawa Timur",27
181
+ 180,"Sragen, Jawa Tengah",24
182
+ 181,"Klaten, Jawa Tengah",29
183
+ 182,"Solo, Jawa Tengah",34
184
+ 183,"Tanggerang, Banten",37
185
+ 184,"Serang, Banten",30
186
+ 185,"Cilacap, Jawa Tengah",33
187
+ 186,"Semarang, Jawa Tengah",28
188
+ 187,"Yogyakarta, DIY",25
189
+ 188,"Kota Gede, DIY",37
190
+ 189,"Cirebon, Jawa Barat",21
191
+ 190,"Subang, Jawa Barat",18
192
+ 191,"Karawang, Jawa Barat",32
193
+ 192,"Purwakarat, Jawa Barat",37
194
+ 193,"Jakarta Utara, DKI Jakarta",20
195
+ 194,"Jakarta Selatan, DKI Jakarta",31
196
+ 195,"Jakarta Pusat, DKI Jakarta",29
197
+ 196,"Bekasi, Jawa Barat",28
198
+ 197,"Bogor, Jawa Barat",27
199
+ 198,"Karawang, Jawa Barat",26
200
+ 199,"Subang, Jawa Barat",18
201
+ 200,"Serang, Banten",34
202
+ 201,"Bekasi, Jawa Barat",21
203
+ 202,"Lampung, Sumatera Selatan",21
204
+ 203,"Jakarta Utara, DKI Jakarta",24
205
+ 204,"Jakarta Selatan, DKI Jakarta",21
206
+ 205,"Bandung, Jawa Barat",24
207
+ 206,"Surabaya, Jawa Timur",30
208
+ 207,"Bekasi, Jawa Barat",20
209
+ 208,"Yogyakarta, DIY",27
210
+ 209,"Bogor, Jawa Barat",19
211
+ 210,"Depok, Jawa Barat",18
212
+ 211,"Jakarta Pusat, DKI Jakarta",22
213
+ 212,"Jakarta Timur, DKI Jakarta",28
214
+ 213,"Bekasi, Jawa Barat",20
215
+ 214,"Semarang, Jawa Tengah",30
216
+ 215,"Yogyakarta, DIY",23
217
+ 216,"Cirebon, Jawa Barat",28
218
+ 217,"Lampung, Sumatera Selatan",20
219
+ 218,"Semarang, Jawa Tengah",29
220
+ 219,"Subang, Jawa Barat",24
221
+ 220,"Jakarta Barat, DKI Jakarta",23
222
+ 221,"Bekasi, Jawa Barat",30
223
+ 222,"Lampung, Sumatera Selatan",30
224
+ 223,"Palembang, Sumatera Selatan",20
225
+ 224,"Bogor, Jawa Barat",28
226
+ 225,"Sragen, Jawa Tengah",19
227
+ 226,"Ponorogo, Jawa Timur",23
228
+ 227,"Depok, Jawa Barat",25
229
+ 228,"Jakarta Selatan, DKI Jakarta",18
230
+ 229,"Bekasi, Jawa Barat",20
231
+ 230,"Ponorogo, Jawa Timur",20
232
+ 231,"Sragen, Jawa Tengah",28
233
+ 232,"Klaten, Jawa Tengah",30
234
+ 233,"Solo, Jawa Tengah",23
235
+ 234,"Tanggerang, Banten",27
236
+ 235,"Serang, Banten",18
237
+ 236,"Cilacap, Jawa Tengah",23
238
+ 237,"Semarang, Jawa Tengah",20
239
+ 238,"Yogyakarta, DIY",30
240
+ 239,"Kota Gede, DIY",23
241
+ 240,"Cirebon, Jawa Barat",22
242
+ 241,"Subang, Jawa Barat",20
243
+ 242,"Karawang, Jawa Barat",20
244
+ 243,"Purwakarat, Jawa Barat",20
245
+ 244,"Jakarta Utara, DKI Jakarta",27
246
+ 245,"Jakarta Selatan, DKI Jakarta",27
247
+ 246,"Jakarta Pusat, DKI Jakarta",28
248
+ 247,"Bekasi, Jawa Barat",28
249
+ 248,"Bogor, Jawa Barat",29
250
+ 249,"Karawang, Jawa Barat",19
251
+ 250,"Subang, Jawa Barat",27
252
+ 251,"Semarang, Jawa Tengah",29
253
+ 252,"Bekasi, Jawa Barat",30
254
+ 253,"Cirebon, Jawa Barat",34
255
+ 254,"Bekasi, Jawa Barat",34
256
+ 255,"Lampung, Sumatera Selatan",30
257
+ 256,"Jakarta Utara, DKI Jakarta",39
258
+ 257,"Jakarta Selatan, DKI Jakarta",25
259
+ 258,"Bandung, Jawa Barat",38
260
+ 259,"Surabaya, Jawa Timur",27
261
+ 260,"Bekasi, Jawa Barat",33
262
+ 261,"Yogyakarta, DIY",38
263
+ 262,"Bogor, Jawa Barat",38
264
+ 263,"Depok, Jawa Barat",27
265
+ 264,"Jakarta Pusat, DKI Jakarta",37
266
+ 265,"Jakarta Timur, DKI Jakarta",30
267
+ 266,"Bekasi, Jawa Barat",32
268
+ 267,"Semarang, Jawa Tengah",31
269
+ 268,"Yogyakarta, DIY",32
270
+ 269,"Cirebon, Jawa Barat",36
271
+ 270,"Lampung, Sumatera Selatan",25
272
+ 271,"Semarang, Jawa Tengah",37
273
+ 272,"Subang, Jawa Barat",30
274
+ 273,"Jakarta Barat, DKI Jakarta",33
275
+ 274,"Bekasi, Jawa Barat",38
276
+ 275,"Lampung, Sumatera Selatan",34
277
+ 276,"Lampung, Sumatera Selatan",39
278
+ 277,"Jakarta Utara, DKI Jakarta",29
279
+ 278,"Jakarta Selatan, DKI Jakarta",40
280
+ 279,"Bandung, Jawa Barat",28
281
+ 280,"Surabaya, Jawa Timur",40
282
+ 281,"Bekasi, Jawa Barat",30
283
+ 282,"Yogyakarta, DIY",39
284
+ 283,"Bogor, Jawa Barat",37
285
+ 284,"Depok, Jawa Barat",29
286
+ 285,"Jakarta Pusat, DKI Jakarta",29
287
+ 286,"Jakarta Timur, DKI Jakarta",35
288
+ 287,"Bekasi, Jawa Barat",27
289
+ 288,"Semarang, Jawa Tengah",30
290
+ 289,"Yogyakarta, DIY",35
291
+ 290,"Cirebon, Jawa Barat",34
292
+ 291,"Lampung, Sumatera Selatan",25
293
+ 292,"Semarang, Jawa Tengah",29
294
+ 293,"Subang, Jawa Barat",34
295
+ 294,"Jakarta Barat, DKI Jakarta",28
296
+ 295,"Bekasi, Jawa Barat",31
297
+ 296,"Lampung, Sumatera Selatan",31
298
+ 297,"Palembang, Sumatera Selatan",39
299
+ 298,"Bogor, Jawa Barat",38
300
+ 299,"Sragen, Jawa Tengah",27
301
+ 300,"Ponorogo, Jawa Timur",26
exploration-LlamaRecommender.ipynb ADDED
@@ -0,0 +1,844 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "Berhasil terkoneksi ke MySQL Server\n",
13
+ "Database 'tourism_destination' berhasil dibuat!\n",
14
+ "Koneksi ke MySQL ditutup\n"
15
+ ]
16
+ }
17
+ ],
18
+ "source": [
19
+ "import mysql.connector\n",
20
+ "from mysql.connector import Error\n",
21
+ "\n",
22
+ "# Fungsi untuk membuat koneksi ke MySQL dan membuat database\n",
23
+ "def create_database(host_name, user_name, user_password, db_name):\n",
24
+ " try:\n",
25
+ " # Koneksi ke server MySQL\n",
26
+ " connection = mysql.connector.connect(\n",
27
+ " host=host_name,\n",
28
+ " user=user_name,\n",
29
+ " password=user_password\n",
30
+ " )\n",
31
+ " \n",
32
+ " if connection.is_connected():\n",
33
+ " print(\"Berhasil terkoneksi ke MySQL Server\")\n",
34
+ " cursor = connection.cursor()\n",
35
+ " # Membuat database baru\n",
36
+ " cursor.execute(f\"CREATE DATABASE {db_name}\")\n",
37
+ " print(f\"Database '{db_name}' berhasil dibuat!\")\n",
38
+ " \n",
39
+ " except Error as e:\n",
40
+ " print(f\"Error: '{e}' terjadi\")\n",
41
+ " \n",
42
+ " finally:\n",
43
+ " # Menutup koneksi\n",
44
+ " if connection.is_connected():\n",
45
+ " cursor.close()\n",
46
+ " connection.close()\n",
47
+ " print(\"Koneksi ke MySQL ditutup\")\n",
48
+ "\n",
49
+ "# Contoh penggunaan\n",
50
+ "host = \"localhost\"\n",
51
+ "user = \"root\"\n",
52
+ "password = \"admin123\"\n",
53
+ "database_name = \"tourism_destination\"\n",
54
+ "\n",
55
+ "create_database(host, user, password, database_name)"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 2,
61
+ "metadata": {},
62
+ "outputs": [
63
+ {
64
+ "name": "stdout",
65
+ "output_type": "stream",
66
+ "text": [
67
+ "Berhasil terkoneksi ke database 'tourism_destination'\n",
68
+ "Tabel 'places' berhasil dibuat!\n",
69
+ "Koneksi ke MySQL ditutup\n"
70
+ ]
71
+ }
72
+ ],
73
+ "source": [
74
+ "def create_table(host_name, user_name, user_password, db_name):\n",
75
+ " try:\n",
76
+ " # Koneksi ke MySQL dan pilih database\n",
77
+ " connection = mysql.connector.connect(\n",
78
+ " host=host_name,\n",
79
+ " user=user_name,\n",
80
+ " password=user_password,\n",
81
+ " database=db_name\n",
82
+ " )\n",
83
+ " \n",
84
+ " if connection.is_connected():\n",
85
+ " print(f\"Berhasil terkoneksi ke database '{db_name}'\")\n",
86
+ " cursor = connection.cursor()\n",
87
+ " \n",
88
+ " # Membuat tabel dengan kolom sesuai format yang diberikan\n",
89
+ " create_table_query = \"\"\"\n",
90
+ " CREATE TABLE places (\n",
91
+ " Place_Id INT AUTO_INCREMENT PRIMARY KEY,\n",
92
+ " Place_Name VARCHAR(255) NOT NULL,\n",
93
+ " Description TEXT,\n",
94
+ " Category VARCHAR(100),\n",
95
+ " City VARCHAR(100),\n",
96
+ " Price DECIMAL(10, 2), \n",
97
+ " Rating FLOAT \n",
98
+ " );\n",
99
+ " \"\"\"\n",
100
+ " cursor.execute(create_table_query)\n",
101
+ " print(\"Tabel 'places' berhasil dibuat!\")\n",
102
+ " \n",
103
+ " except Error as e:\n",
104
+ " print(f\"Error: '{e}' terjadi\")\n",
105
+ " \n",
106
+ " finally:\n",
107
+ " # Menutup koneksi\n",
108
+ " if connection.is_connected():\n",
109
+ " cursor.close()\n",
110
+ " connection.close()\n",
111
+ " print(\"Koneksi ke MySQL ditutup\")\n",
112
+ "\n",
113
+ "# Contoh penggunaan\n",
114
+ "host = \"localhost\"\n",
115
+ "user = \"root\"\n",
116
+ "password = \"admin123\"\n",
117
+ "database_name = \"tourism_destination\"\n",
118
+ "\n",
119
+ "create_table(host, user, password, database_name)"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": 4,
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "import pandas as pd"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 6,
134
+ "metadata": {},
135
+ "outputs": [
136
+ {
137
+ "data": {
138
+ "text/html": [
139
+ "<div>\n",
140
+ "<style scoped>\n",
141
+ " .dataframe tbody tr th:only-of-type {\n",
142
+ " vertical-align: middle;\n",
143
+ " }\n",
144
+ "\n",
145
+ " .dataframe tbody tr th {\n",
146
+ " vertical-align: top;\n",
147
+ " }\n",
148
+ "\n",
149
+ " .dataframe thead th {\n",
150
+ " text-align: right;\n",
151
+ " }\n",
152
+ "</style>\n",
153
+ "<table border=\"1\" class=\"dataframe\">\n",
154
+ " <thead>\n",
155
+ " <tr style=\"text-align: right;\">\n",
156
+ " <th></th>\n",
157
+ " <th>Place_Id</th>\n",
158
+ " <th>Place_Name</th>\n",
159
+ " <th>Description</th>\n",
160
+ " <th>Category</th>\n",
161
+ " <th>City</th>\n",
162
+ " <th>Price</th>\n",
163
+ " <th>Rating</th>\n",
164
+ " <th>Time_Minutes</th>\n",
165
+ " <th>Coordinate</th>\n",
166
+ " <th>Lat</th>\n",
167
+ " <th>Long</th>\n",
168
+ " <th>Unnamed: 11</th>\n",
169
+ " <th>Unnamed: 12</th>\n",
170
+ " </tr>\n",
171
+ " </thead>\n",
172
+ " <tbody>\n",
173
+ " <tr>\n",
174
+ " <th>0</th>\n",
175
+ " <td>1</td>\n",
176
+ " <td>Monumen Nasional</td>\n",
177
+ " <td>Monumen Nasional atau yang populer disingkat d...</td>\n",
178
+ " <td>Budaya</td>\n",
179
+ " <td>Jakarta</td>\n",
180
+ " <td>20000</td>\n",
181
+ " <td>4.6</td>\n",
182
+ " <td>15.0</td>\n",
183
+ " <td>{'lat': -6.1753924, 'lng': 106.8271528}</td>\n",
184
+ " <td>-6.175392</td>\n",
185
+ " <td>106.827153</td>\n",
186
+ " <td>NaN</td>\n",
187
+ " <td>1</td>\n",
188
+ " </tr>\n",
189
+ " <tr>\n",
190
+ " <th>1</th>\n",
191
+ " <td>2</td>\n",
192
+ " <td>Kota Tua</td>\n",
193
+ " <td>Kota tua di Jakarta, yang juga bernama Kota Tu...</td>\n",
194
+ " <td>Budaya</td>\n",
195
+ " <td>Jakarta</td>\n",
196
+ " <td>0</td>\n",
197
+ " <td>4.6</td>\n",
198
+ " <td>90.0</td>\n",
199
+ " <td>{'lat': -6.137644799999999, 'lng': 106.8171245}</td>\n",
200
+ " <td>-6.137645</td>\n",
201
+ " <td>106.817125</td>\n",
202
+ " <td>NaN</td>\n",
203
+ " <td>2</td>\n",
204
+ " </tr>\n",
205
+ " <tr>\n",
206
+ " <th>2</th>\n",
207
+ " <td>3</td>\n",
208
+ " <td>Dunia Fantasi</td>\n",
209
+ " <td>Dunia Fantasi atau disebut juga Dufan adalah t...</td>\n",
210
+ " <td>Taman Hiburan</td>\n",
211
+ " <td>Jakarta</td>\n",
212
+ " <td>270000</td>\n",
213
+ " <td>4.6</td>\n",
214
+ " <td>360.0</td>\n",
215
+ " <td>{'lat': -6.125312399999999, 'lng': 106.8335377}</td>\n",
216
+ " <td>-6.125312</td>\n",
217
+ " <td>106.833538</td>\n",
218
+ " <td>NaN</td>\n",
219
+ " <td>3</td>\n",
220
+ " </tr>\n",
221
+ " <tr>\n",
222
+ " <th>3</th>\n",
223
+ " <td>4</td>\n",
224
+ " <td>Taman Mini Indonesia Indah (TMII)</td>\n",
225
+ " <td>Taman Mini Indonesia Indah merupakan suatu kaw...</td>\n",
226
+ " <td>Taman Hiburan</td>\n",
227
+ " <td>Jakarta</td>\n",
228
+ " <td>10000</td>\n",
229
+ " <td>4.5</td>\n",
230
+ " <td>NaN</td>\n",
231
+ " <td>{'lat': -6.302445899999999, 'lng': 106.8951559}</td>\n",
232
+ " <td>-6.302446</td>\n",
233
+ " <td>106.895156</td>\n",
234
+ " <td>NaN</td>\n",
235
+ " <td>4</td>\n",
236
+ " </tr>\n",
237
+ " <tr>\n",
238
+ " <th>4</th>\n",
239
+ " <td>5</td>\n",
240
+ " <td>Atlantis Water Adventure</td>\n",
241
+ " <td>Atlantis Water Adventure atau dikenal dengan A...</td>\n",
242
+ " <td>Taman Hiburan</td>\n",
243
+ " <td>Jakarta</td>\n",
244
+ " <td>94000</td>\n",
245
+ " <td>4.5</td>\n",
246
+ " <td>60.0</td>\n",
247
+ " <td>{'lat': -6.12419, 'lng': 106.839134}</td>\n",
248
+ " <td>-6.124190</td>\n",
249
+ " <td>106.839134</td>\n",
250
+ " <td>NaN</td>\n",
251
+ " <td>5</td>\n",
252
+ " </tr>\n",
253
+ " </tbody>\n",
254
+ "</table>\n",
255
+ "</div>"
256
+ ],
257
+ "text/plain": [
258
+ " Place_Id Place_Name \\\n",
259
+ "0 1 Monumen Nasional \n",
260
+ "1 2 Kota Tua \n",
261
+ "2 3 Dunia Fantasi \n",
262
+ "3 4 Taman Mini Indonesia Indah (TMII) \n",
263
+ "4 5 Atlantis Water Adventure \n",
264
+ "\n",
265
+ " Description Category City \\\n",
266
+ "0 Monumen Nasional atau yang populer disingkat d... Budaya Jakarta \n",
267
+ "1 Kota tua di Jakarta, yang juga bernama Kota Tu... Budaya Jakarta \n",
268
+ "2 Dunia Fantasi atau disebut juga Dufan adalah t... Taman Hiburan Jakarta \n",
269
+ "3 Taman Mini Indonesia Indah merupakan suatu kaw... Taman Hiburan Jakarta \n",
270
+ "4 Atlantis Water Adventure atau dikenal dengan A... Taman Hiburan Jakarta \n",
271
+ "\n",
272
+ " Price Rating Time_Minutes \\\n",
273
+ "0 20000 4.6 15.0 \n",
274
+ "1 0 4.6 90.0 \n",
275
+ "2 270000 4.6 360.0 \n",
276
+ "3 10000 4.5 NaN \n",
277
+ "4 94000 4.5 60.0 \n",
278
+ "\n",
279
+ " Coordinate Lat Long \\\n",
280
+ "0 {'lat': -6.1753924, 'lng': 106.8271528} -6.175392 106.827153 \n",
281
+ "1 {'lat': -6.137644799999999, 'lng': 106.8171245} -6.137645 106.817125 \n",
282
+ "2 {'lat': -6.125312399999999, 'lng': 106.8335377} -6.125312 106.833538 \n",
283
+ "3 {'lat': -6.302445899999999, 'lng': 106.8951559} -6.302446 106.895156 \n",
284
+ "4 {'lat': -6.12419, 'lng': 106.839134} -6.124190 106.839134 \n",
285
+ "\n",
286
+ " Unnamed: 11 Unnamed: 12 \n",
287
+ "0 NaN 1 \n",
288
+ "1 NaN 2 \n",
289
+ "2 NaN 3 \n",
290
+ "3 NaN 4 \n",
291
+ "4 NaN 5 "
292
+ ]
293
+ },
294
+ "execution_count": 6,
295
+ "metadata": {},
296
+ "output_type": "execute_result"
297
+ }
298
+ ],
299
+ "source": [
300
+ "data = pd.read_csv(r'dataset_recommendation_tourism\\tourism_with_id.csv')\n",
301
+ "data.head()"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 7,
307
+ "metadata": {},
308
+ "outputs": [
309
+ {
310
+ "data": {
311
+ "text/plain": [
312
+ "Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',\n",
313
+ " 'Rating', 'Time_Minutes', 'Coordinate', 'Lat', 'Long', 'Unnamed: 11',\n",
314
+ " 'Unnamed: 12'],\n",
315
+ " dtype='object')"
316
+ ]
317
+ },
318
+ "execution_count": 7,
319
+ "metadata": {},
320
+ "output_type": "execute_result"
321
+ }
322
+ ],
323
+ "source": [
324
+ "data.columns"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 8,
330
+ "metadata": {},
331
+ "outputs": [
332
+ {
333
+ "data": {
334
+ "text/plain": [
335
+ "Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',\n",
336
+ " 'Rating'],\n",
337
+ " dtype='object')"
338
+ ]
339
+ },
340
+ "execution_count": 8,
341
+ "metadata": {},
342
+ "output_type": "execute_result"
343
+ }
344
+ ],
345
+ "source": [
346
+ "data = data.drop(['Time_Minutes', 'Coordinate',\n",
347
+ " 'Lat', 'Long', 'Unnamed: 11',\n",
348
+ " 'Unnamed: 12'], axis=1)\n",
349
+ "data.columns"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "code",
354
+ "execution_count": 9,
355
+ "metadata": {},
356
+ "outputs": [],
357
+ "source": [
358
+ "data.to_csv('tourism_place.csv')"
359
+ ]
360
+ },
361
+ {
362
+ "cell_type": "code",
363
+ "execution_count": 10,
364
+ "metadata": {},
365
+ "outputs": [
366
+ {
367
+ "name": "stdout",
368
+ "output_type": "stream",
369
+ "text": [
370
+ "<class 'pandas.core.frame.DataFrame'>\n",
371
+ "RangeIndex: 437 entries, 0 to 436\n",
372
+ "Data columns (total 7 columns):\n",
373
+ " # Column Non-Null Count Dtype \n",
374
+ "--- ------ -------------- ----- \n",
375
+ " 0 Place_Id 437 non-null int64 \n",
376
+ " 1 Place_Name 437 non-null object \n",
377
+ " 2 Description 437 non-null object \n",
378
+ " 3 Category 437 non-null object \n",
379
+ " 4 City 437 non-null object \n",
380
+ " 5 Price 437 non-null int64 \n",
381
+ " 6 Rating 437 non-null float64\n",
382
+ "dtypes: float64(1), int64(2), object(4)\n",
383
+ "memory usage: 24.0+ KB\n"
384
+ ]
385
+ }
386
+ ],
387
+ "source": [
388
+ "data.info()"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": 11,
394
+ "metadata": {},
395
+ "outputs": [
396
+ {
397
+ "data": {
398
+ "text/html": [
399
+ "<div>\n",
400
+ "<style scoped>\n",
401
+ " .dataframe tbody tr th:only-of-type {\n",
402
+ " vertical-align: middle;\n",
403
+ " }\n",
404
+ "\n",
405
+ " .dataframe tbody tr th {\n",
406
+ " vertical-align: top;\n",
407
+ " }\n",
408
+ "\n",
409
+ " .dataframe thead th {\n",
410
+ " text-align: right;\n",
411
+ " }\n",
412
+ "</style>\n",
413
+ "<table border=\"1\" class=\"dataframe\">\n",
414
+ " <thead>\n",
415
+ " <tr style=\"text-align: right;\">\n",
416
+ " <th></th>\n",
417
+ " <th>Place_Id</th>\n",
418
+ " <th>Place_Name</th>\n",
419
+ " <th>Description</th>\n",
420
+ " <th>Category</th>\n",
421
+ " <th>City</th>\n",
422
+ " <th>Price</th>\n",
423
+ " <th>Rating</th>\n",
424
+ " </tr>\n",
425
+ " </thead>\n",
426
+ " <tbody>\n",
427
+ " <tr>\n",
428
+ " <th>0</th>\n",
429
+ " <td>1</td>\n",
430
+ " <td>Monumen Nasional</td>\n",
431
+ " <td>Monumen Nasional atau yang populer disingkat d...</td>\n",
432
+ " <td>Budaya</td>\n",
433
+ " <td>Jakarta</td>\n",
434
+ " <td>20000</td>\n",
435
+ " <td>4.6</td>\n",
436
+ " </tr>\n",
437
+ " <tr>\n",
438
+ " <th>1</th>\n",
439
+ " <td>2</td>\n",
440
+ " <td>Kota Tua</td>\n",
441
+ " <td>Kota tua di Jakarta, yang juga bernama Kota Tu...</td>\n",
442
+ " <td>Budaya</td>\n",
443
+ " <td>Jakarta</td>\n",
444
+ " <td>0</td>\n",
445
+ " <td>4.6</td>\n",
446
+ " </tr>\n",
447
+ " <tr>\n",
448
+ " <th>2</th>\n",
449
+ " <td>3</td>\n",
450
+ " <td>Dunia Fantasi</td>\n",
451
+ " <td>Dunia Fantasi atau disebut juga Dufan adalah t...</td>\n",
452
+ " <td>Taman Hiburan</td>\n",
453
+ " <td>Jakarta</td>\n",
454
+ " <td>270000</td>\n",
455
+ " <td>4.6</td>\n",
456
+ " </tr>\n",
457
+ " <tr>\n",
458
+ " <th>3</th>\n",
459
+ " <td>4</td>\n",
460
+ " <td>Taman Mini Indonesia Indah (TMII)</td>\n",
461
+ " <td>Taman Mini Indonesia Indah merupakan suatu kaw...</td>\n",
462
+ " <td>Taman Hiburan</td>\n",
463
+ " <td>Jakarta</td>\n",
464
+ " <td>10000</td>\n",
465
+ " <td>4.5</td>\n",
466
+ " </tr>\n",
467
+ " <tr>\n",
468
+ " <th>4</th>\n",
469
+ " <td>5</td>\n",
470
+ " <td>Atlantis Water Adventure</td>\n",
471
+ " <td>Atlantis Water Adventure atau dikenal dengan A...</td>\n",
472
+ " <td>Taman Hiburan</td>\n",
473
+ " <td>Jakarta</td>\n",
474
+ " <td>94000</td>\n",
475
+ " <td>4.5</td>\n",
476
+ " </tr>\n",
477
+ " </tbody>\n",
478
+ "</table>\n",
479
+ "</div>"
480
+ ],
481
+ "text/plain": [
482
+ " Place_Id Place_Name \\\n",
483
+ "0 1 Monumen Nasional \n",
484
+ "1 2 Kota Tua \n",
485
+ "2 3 Dunia Fantasi \n",
486
+ "3 4 Taman Mini Indonesia Indah (TMII) \n",
487
+ "4 5 Atlantis Water Adventure \n",
488
+ "\n",
489
+ " Description Category City \\\n",
490
+ "0 Monumen Nasional atau yang populer disingkat d... Budaya Jakarta \n",
491
+ "1 Kota tua di Jakarta, yang juga bernama Kota Tu... Budaya Jakarta \n",
492
+ "2 Dunia Fantasi atau disebut juga Dufan adalah t... Taman Hiburan Jakarta \n",
493
+ "3 Taman Mini Indonesia Indah merupakan suatu kaw... Taman Hiburan Jakarta \n",
494
+ "4 Atlantis Water Adventure atau dikenal dengan A... Taman Hiburan Jakarta \n",
495
+ "\n",
496
+ " Price Rating \n",
497
+ "0 20000 4.6 \n",
498
+ "1 0 4.6 \n",
499
+ "2 270000 4.6 \n",
500
+ "3 10000 4.5 \n",
501
+ "4 94000 4.5 "
502
+ ]
503
+ },
504
+ "execution_count": 11,
505
+ "metadata": {},
506
+ "output_type": "execute_result"
507
+ }
508
+ ],
509
+ "source": [
510
+ "data.head()"
511
+ ]
512
+ },
513
+ {
514
+ "cell_type": "code",
515
+ "execution_count": 2,
516
+ "metadata": {},
517
+ "outputs": [
518
+ {
519
+ "name": "stderr",
520
+ "output_type": "stream",
521
+ "text": [
522
+ "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\sentence_transformers\\cross_encoder\\CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
523
+ " from tqdm.autonotebook import tqdm, trange\n"
524
+ ]
525
+ }
526
+ ],
527
+ "source": [
528
+ "import mysql.connector\n",
529
+ "from mysql.connector import Error\n",
530
+ "import ollama\n",
531
+ "from sentence_transformers import SentenceTransformer\n",
532
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
533
+ "from tqdm import tqdm\n",
534
+ "import numpy as np"
535
+ ]
536
+ },
537
+ {
538
+ "cell_type": "code",
539
+ "execution_count": 13,
540
+ "metadata": {},
541
+ "outputs": [],
542
+ "source": [
543
+ "def connect_to_database():\n",
544
+ " try:\n",
545
+ " connection = mysql.connector.connect(\n",
546
+ " host=\"localhost\",\n",
547
+ " user=\"root\",\n",
548
+ " password=\"admin123\",\n",
549
+ " database=\"tourism_destination\"\n",
550
+ " )\n",
551
+ " return connection\n",
552
+ " except Error as e:\n",
553
+ " print(f\"Error: '{e}'\")\n",
554
+ " return None\n",
555
+ " \n",
556
+ " # Function to check if a column exists, and add it if necessary\n",
557
+ "def add_embedding_column_if_not_exists(cursor):\n",
558
+ " # Check if the 'Embedding' column exists\n",
559
+ " cursor.execute(\"SHOW COLUMNS FROM places LIKE 'Embedding'\")\n",
560
+ " result = cursor.fetchone()\n",
561
+ " \n",
562
+ " # If the 'Embedding' column does not exist, add it\n",
563
+ " if not result:\n",
564
+ " print(\"Adding 'Embedding' column to the table...\")\n",
565
+ " cursor.execute(\"ALTER TABLE places ADD COLUMN Embedding TEXT\")\n",
566
+ " print(\"'Embedding' column added.\")"
567
+ ]
568
+ },
569
+ {
570
+ "cell_type": "code",
571
+ "execution_count": 14,
572
+ "metadata": {},
573
+ "outputs": [
574
+ {
575
+ "name": "stdout",
576
+ "output_type": "stream",
577
+ "text": [
578
+ "Adding 'Embedding' column to the table...\n"
579
+ ]
580
+ }
581
+ ],
582
+ "source": [
583
+ "connection = connect_to_database()\n",
584
+ "cursor = connection.cursor()\n",
585
+ "add_embedding_column_if_not_exists(cursor)"
586
+ ]
587
+ },
588
+ {
589
+ "cell_type": "code",
590
+ "execution_count": 3,
591
+ "metadata": {},
592
+ "outputs": [
593
+ {
594
+ "name": "stderr",
595
+ "output_type": "stream",
596
+ "text": [
597
+ "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
598
+ " warnings.warn(\n"
599
+ ]
600
+ }
601
+ ],
602
+ "source": [
603
+ "# Koneksi ke MySQL\n",
604
+ "def connect_to_database():\n",
605
+ " try:\n",
606
+ " connection = mysql.connector.connect(\n",
607
+ " host=\"localhost\",\n",
608
+ " user=\"root\",\n",
609
+ " password=\"admin123\",\n",
610
+ " database=\"tourism_destination\"\n",
611
+ " )\n",
612
+ " return connection\n",
613
+ " except Error as e:\n",
614
+ " print(f\"Error: '{e}'\")\n",
615
+ " return None\n",
616
+ "\n",
617
+ "\n",
618
+ "\n",
619
+ "# Compute and store embeddings\n",
620
+ "def compute_and_store_embeddings():\n",
621
+ " model = SentenceTransformer('paraphrase-MiniLM-L6-v2') \n",
622
+ "\n",
623
+ " # Connect to the database\n",
624
+ " connection = connect_to_database()\n",
625
+ " if connection is None:\n",
626
+ " return\n",
627
+ " \n",
628
+ " cursor = connection.cursor(dictionary=True)\n",
629
+ " \n",
630
+ " # Select all places from the database\n",
631
+ " cursor.execute(\"SELECT Place_Id, Place_Name, Category, Description, City FROM places\")\n",
632
+ " places = cursor.fetchall()\n",
633
+ " \n",
634
+ " for place in places:\n",
635
+ " # Combine PlaceName, Category, Description, and City into one string\n",
636
+ " text = f\"{place['Place_Name']} {place['Category']} {place['Description']} {place['City']}\"\n",
637
+ " \n",
638
+ " # Generate embedding for the combined text\n",
639
+ " embedding = model.encode(text)\n",
640
+ " \n",
641
+ " # Convert embedding to a string format to store in the database\n",
642
+ " embedding_str = ','.join([str(x) for x in embedding])\n",
643
+ " \n",
644
+ " # Update the place in the database with the embedding\n",
645
+ " cursor.execute(\n",
646
+ " \"UPDATE places SET Embedding = %s WHERE Place_Id = %s\", \n",
647
+ " (embedding_str, place['Place_Id'])\n",
648
+ " )\n",
649
+ " \n",
650
+ " # Commit the changes and close the connection\n",
651
+ " connection.commit()\n",
652
+ " cursor.close()\n",
653
+ " connection.close()\n",
654
+ "\n",
655
+ "# Run the function to compute and store embeddings\n",
656
+ "compute_and_store_embeddings()"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "code",
661
+ "execution_count": 5,
662
+ "metadata": {},
663
+ "outputs": [
664
+ {
665
+ "name": "stderr",
666
+ "output_type": "stream",
667
+ "text": [
668
+ "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
669
+ " warnings.warn(\n"
670
+ ]
671
+ },
672
+ {
673
+ "name": "stdout",
674
+ "output_type": "stream",
675
+ "text": [
676
+ "\n",
677
+ "Top 5 Ranked Destinations:\n",
678
+ "\n",
679
+ "Pulau Semak Daun (Rating: 4.0, Similarity Score: 0.6861)\n",
680
+ "Wisata Batu Kuda (Rating: 4.4, Similarity Score: 0.6839)\n",
681
+ "Gedung Agung Yogyakarta (Rating: 4.6, Similarity Score: 0.6727)\n",
682
+ "Taman Sungai Mudal (Rating: 4.6, Similarity Score: 0.6595)\n",
683
+ "Grand Maerakaca (Rating: 4.4, Similarity Score: 0.6581)\n"
684
+ ]
685
+ }
686
+ ],
687
+ "source": [
688
+ "# Koneksi ke MySQL\n",
689
+ "def query_database():\n",
690
+ " try:\n",
691
+ " connection = mysql.connector.connect(\n",
692
+ " host=\"localhost\",\n",
693
+ " user=\"root\",\n",
694
+ " password=\"admin123\",\n",
695
+ " database=\"tourism_destination\"\n",
696
+ " )\n",
697
+ "\n",
698
+ " if connection.is_connected():\n",
699
+ " cursor = connection.cursor(dictionary=True)\n",
700
+ " sql = \"SELECT * FROM places\"\n",
701
+ " cursor.execute(sql)\n",
702
+ " results = cursor.fetchall()\n",
703
+ " return results\n",
704
+ "\n",
705
+ " except Error as e:\n",
706
+ " print(f\"Error: '{e}'\")\n",
707
+ " \n",
708
+ " finally:\n",
709
+ " if connection.is_connected():\n",
710
+ " cursor.close()\n",
711
+ " connection.close()\n",
712
+ "\n",
713
+ "# Get embedding from the database and calculate cosine similarity\n",
714
+ "def get_similar_places(user_embedding, db_results):\n",
715
+ " similarities = []\n",
716
+ " \n",
717
+ " for place in db_results:\n",
718
+ " embedding_str = place['Embedding'] # Assuming embeddings are stored as comma-separated strings in the database\n",
719
+ " embedding = np.array([float(x) for x in embedding_str.split(',')]) # Convert the string back to a numpy array\n",
720
+ " \n",
721
+ " # Compute cosine similarity\n",
722
+ " similarity = cosine_similarity([user_embedding], [embedding])[0][0]\n",
723
+ " similarities.append((place, similarity))\n",
724
+ " \n",
725
+ " # Sort results based on similarity and then by rating\n",
726
+ " ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)\n",
727
+ " \n",
728
+ " # Return top 5 places\n",
729
+ " return ranked_results[:5]\n",
730
+ "\n",
731
+ "# Ollama - Generate possible places (Retrieval Augmented Generation)\n",
732
+ "def generate_rag_result(user_query):\n",
733
+ " prompt = f\"User Query: {user_query}\\n\\nPlease list 10 potential destinations based on user query:\"\n",
734
+ " \n",
735
+ " print(\"\\nGenerating results using Ollama (RAG)...\\n\")\n",
736
+ " with tqdm(total=10, desc=\"Processing RAG\") as pbar:\n",
737
+ " response = ollama.generate(model=\"llama3.1\", prompt=prompt)\n",
738
+ " pbar.update(5)\n",
739
+ " \n",
740
+ " # Process the response (assuming response structure is consistent)\n",
741
+ " print(\"Full response:\", response)\n",
742
+ " return response # For now, we don't need to extract specific places, as similarity search will handle that\n",
743
+ "\n",
744
+ "# Main function to find the top 5 destinations\n",
745
+ "def get_top_5_destinations(user_query):\n",
746
+ " # Step 1: Generate embedding for user query\n",
747
+ " model = SentenceTransformer('paraphrase-MiniLM-L6-v2')\n",
748
+ " user_embedding = model.encode(user_query)\n",
749
+ " \n",
750
+ " # Step 2: Fetch all places from the database\n",
751
+ " db_results = query_database()\n",
752
+ " if not db_results or len(db_results) == 0:\n",
753
+ " print(\"No data returned from database.\")\n",
754
+ " return\n",
755
+ " \n",
756
+ " # Step 3: Find the most similar places\n",
757
+ " top_5_places = get_similar_places(user_embedding, db_results)\n",
758
+ " \n",
759
+ " # Step 4: Display top 5 destinations\n",
760
+ " print(\"\\nTop 5 Ranked Destinations:\\n\")\n",
761
+ " for place, score in top_5_places:\n",
762
+ " print(f\"{place['Place_Name']} (Rating: {place['Rating']}, Similarity Score: {score:.4f})\")\n",
763
+ "\n",
764
+ "# Example user query\n",
765
+ "user_query = \"Saya ingin ke Jogjakarta dan saya suka dengan pemandangan alam. kemana saya harus pergi?\"\n",
766
+ "get_top_5_destinations(user_query)"
767
+ ]
768
+ },
769
+ {
770
+ "cell_type": "code",
771
+ "execution_count": 3,
772
+ "metadata": {},
773
+ "outputs": [],
774
+ "source": [
775
+ "# prompt= \"do u know about LLM?\"\n",
776
+ "# response = ollama.generate(model=\"llama3.1\", prompt=prompt)"
777
+ ]
778
+ },
779
+ {
780
+ "cell_type": "code",
781
+ "execution_count": 4,
782
+ "metadata": {},
783
+ "outputs": [],
784
+ "source": [
785
+ "# response['response'].strip().split('\\n')"
786
+ ]
787
+ },
788
+ {
789
+ "cell_type": "code",
790
+ "execution_count": 8,
791
+ "metadata": {},
792
+ "outputs": [
793
+ {
794
+ "name": "stderr",
795
+ "output_type": "stream",
796
+ "text": [
797
+ "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
798
+ " warnings.warn(\n"
799
+ ]
800
+ },
801
+ {
802
+ "name": "stdout",
803
+ "output_type": "stream",
804
+ "text": [
805
+ "\n",
806
+ "Top 5 Ranked Destinations:\n",
807
+ "\n",
808
+ "Pulau Semak Daun (Rating: 4.0, Similarity Score: 0.6407)\n",
809
+ "Jembatan Merah (Rating: 4.5, Similarity Score: 0.6401)\n",
810
+ "Pasar Beringharjo (Rating: 4.5, Similarity Score: 0.6331)\n",
811
+ "Gereja Perawan Maria Tak Berdosa Surabaya (Rating: 4.8, Similarity Score: 0.6286)\n",
812
+ "Perpustakaan Nasional (Rating: 4.7, Similarity Score: 0.6256)\n"
813
+ ]
814
+ }
815
+ ],
816
+ "source": [
817
+ "# Example user query\n",
818
+ "user_query = \"Saya ingin ke Surabaya dan ingin berbelanja. kemana saya harus pergi?\"\n",
819
+ "get_top_5_destinations(user_query)"
820
+ ]
821
+ }
822
+ ],
823
+ "metadata": {
824
+ "kernelspec": {
825
+ "display_name": "llama_venv",
826
+ "language": "python",
827
+ "name": "python3"
828
+ },
829
+ "language_info": {
830
+ "codemirror_mode": {
831
+ "name": "ipython",
832
+ "version": 3
833
+ },
834
+ "file_extension": ".py",
835
+ "mimetype": "text/x-python",
836
+ "name": "python",
837
+ "nbconvert_exporter": "python",
838
+ "pygments_lexer": "ipython3",
839
+ "version": "3.12.4"
840
+ }
841
+ },
842
+ "nbformat": 4,
843
+ "nbformat_minor": 2
844
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==2.6.4
2
+ sentence-transformers==2.2.2
3
+ scikit-learn==1.2.2
4
+ groq
5
+ numpy==1.23.5