代码:
coding: utf-8
import traceback
import faulthandler
faulthandler.enable()
from tqdm import *
import pandas as pd
from bert4vec import Bert4Vec
model = Bert4Vec(mode='simbert-base', model_name_or_path="/Users/ruiliu/Downloads/simbert-base-chinese")
model = Bert4Vec(mode='roformer-sim-base', model_name_or_path="/Users/ruiliu/Downloads/roformer-sim-base-chinese")
def load_excel(excel_path, sheet_name=0):
data_list = []
data = pd.read_csv(excel_path, keep_default_na=False)
for i in data.index.values:
data_list.append(data.loc[i].to_dict())
return data_list
if name == "main":
try:
data_list = load_excel("data/交互平台问答数据-ALL-CODE.csv")
o_file = open("data/simbert_no_normal_vector.txt", "w", encoding="utf-8")
for data in data_list:
sentences = [data["标准问题*"]]
vectors = model.encode(sentences, batch_size=64, convert_to_numpy=True, normalize_to_unit=False)
# print(vectors)
# vector = [str(float(i)) for i in vectors[0]]
# print(vector)
# o_file.writelines(sentences[0] + "--" + "--".join(vector) + "\n")
o_file.close()
except Exception as e:
print(traceback.format_exc())