Commit b6115aed authored by 李康's avatar 李康

add some data files

parent f1e29553
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
import sys, os, time, re
list_dir = "./"
brand_dir = "brands/"
def getKey(name, name_en):
if name != "":
return name
else:
return name_en
def findBrand(argv):
listf = open(list_dir + "all_products.csv", "r")
brandf = open(brand_dir + "brands.csv", "r")
outputbrand = "brands_clean.csv"
outputproduct = "products_clean.csv"
obf = open(outputbrand, "w")
opf = open(outputproduct, "w")
brands = []
for line in brandf:
temp = line.split("; ")
brand = getKey(temp[0], temp[1])
if brand in brands:
continue
newbls = temp[:2] + [temp[2].split(" ")[0]] + temp[3:]
if len(newbls) != 4:
print(newbls)
return
obf.write("; ".join(newbls))
# print(brand)
brands.append(brand)
obf.close()
for line in listf:
fields = line.split("; ")
key = getKey(fields[0], fields[1])
if key is None:
print("No product key !!!!!!!!")
return
# print("%s-------->%s" % (key, brand))
brand = fields[-1].strip()
if brand not in brands:
print("There is not brand of %s in product %s" % (brand, key))
continue
newline = fields[:2]
try:
score = float(fields[2]) / 2
except:
score = 0
newline.append(str(score))
comment_num = ''
for ch in fields[3]:
if ch == "人":
break
try:
temp = int(ch)
comment_num += ch
except:
continue
newline.append(str(comment_num))
if fields[4] != "":
temp = fields[4].split("·")
if len(temp) == 2:
newline.append(re.sub(r'\s+','', temp[0]))
price = re.sub(r'\s+','', temp[1])
price = price.split('/')[0]
if price[0] != "¥":
print(price)
return
newline.append(price[1:])
else:
newline.append(re.sub(r'\s+','', temp[0]))
newline.append("")
else:
newline += ["", ""]
newline += fields[5:]
if len(newline) != 9:
print(newline)
return
opf.write("; ".join(newline))
# print("===================totally %s product not found=========================" % len(not_found))
# for nf in not_found:
# print(nf)
opf.close()
if __name__=='__main__':
findBrand(sys.argv)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment