Commit e4407c5f authored by 高雅喆's avatar 高雅喆

read tidb

parent dec6e3c3
......@@ -4,7 +4,7 @@ import numpy as np
import pandas as pd
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine, correlation
from tool import get_all_tag_parent_tag, get_all_tags_name, get_all_word_synonym_words
from tool import get_all_tag_parent_tag, get_all_tags_name, get_all_word_synonym_words, get_data_by_mysql
# Recommender Engine
......@@ -14,12 +14,22 @@ def get_similary_tags(tag_id, tags, ratings_matrix):
tags['similarity'] = ratings_matrix.iloc[tag_id_index]
return tags.sort_values(["similarity"], ascending=False)[:-1].tag_id.tolist() # rm self
def get_user_log():
sql = "select userId, tagId, rating from item_cf_log"
mysql_results = get_data_by_mysql('172.16.40.158', 4000, 'root', '3SYz54LS9#^9sBvC', 'jerry_test', sql)
df = pd.DataFrame(mysql_results)
return df
if __name__ == '__main__':
# User Log
# index: sample id
# columns: userId,tagId,rating
ratings = pd.read_csv("/home/gmuser/gyz/log/cf/user_item_log.csv", sep="\t")
ratings.drop("timestamp", inplace=True, axis=1)
# ratings = pd.read_csv("/home/gmuser/gyz/log/cf/user_item_log.csv", sep="\t")
# ratings.drop("timestamp", inplace=True, axis=1)
# sorted_tag_ids = sorted(ratings.tagId.unique())
# cf_tags = pd.DataFrame({'tag_id': sorted_tag_ids})
ratings = get_user_log()
sorted_tag_ids = sorted(ratings.tagId.unique())
cf_tags = pd.DataFrame({'tag_id': sorted_tag_ids})
# ratings.head()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment