from random import uniform, sample from numpy import * from copy import deepcopy
classTransE: def__init__(self, entityList, relationList, tripleList, margin = 1, learingRate = 0.00001, dim = 10, L1 = True): self.margin = margin self.learingRate = learingRate self.dim = dim#向量维度 self.entityList = entityList#一开始,entityList是entity的list;初始化后,变为字典,key是entity,values是其向量(使用narray)。 self.relationList = relationList#理由同上 self.tripleList = tripleList#理由同上 self.loss = 0 self.L1 = L1 definitialize(self): ''' 初始化向量 ''' entityVectorList = {} relationVectorList = {} for entity inself.entityList: n = 0 entityVector = [] while n < self.dim: ram = init(self.dim)#初始化的范围 entityVector.append(ram) n += 1 entityVector = norm(entityVector)#归一化 entityVectorList[entity] = entityVector print("entityVector初始化完成,数量是%d"%len(entityVectorList)) for relation inself. relationList: n = 0 relationVector = [] while n < self.dim: ram = init(self.dim)#初始化的范围 relationVector.append(ram) n += 1 relationVector = norm(relationVector)#归一化 relationVectorList[relation] = relationVector print("relationVectorList初始化完成,数量是%d"%len(relationVectorList)) self.entityList = entityVectorList self.relationList = relationVectorList deftransE(self, cI = 20): print("训练开始") for cycleIndex inrange(cI): Sbatch = self.getSample(3) Tbatch = []#元组对(原三元组,打碎的三元组)的列表 :{((h,r,t),(h',r,t'))} for sbatch in Sbatch: tripletWithCorruptedTriplet = (sbatch, self.getCorruptedTriplet(sbatch)) # print(tripletWithCorruptedTriplet) if(tripletWithCorruptedTriplet notin Tbatch): Tbatch.append(tripletWithCorruptedTriplet) self.update(Tbatch) if cycleIndex % 100 == 0: print("第%d次循环"%cycleIndex) print(self.loss) self.writeRelationVector("E:\pythoncode\knownlageGraph\\transE-master\\relationVector.txt") self.writeEntilyVector("E:\pythoncode\knownlageGraph\\transE-master\\entityVector.txt") self.loss = 0 defgetSample(self, size): return sample(self.tripleList, size)
defgetCorruptedTriplet(self, triplet): ''' training triplets with either the head or tail replaced by a random entity (but not both at the same time) :param triplet: :return corruptedTriplet: ''' i = uniform(-1, 1) if i < 0: # 小于0,打坏三元组的第一项 whileTrue: entityTemp = sample(self.entityList.keys(), 1)[0] if entityTemp != triplet[0]: break corruptedTriplet = (entityTemp, triplet[1], triplet[2]) else: # 大于等于0,打坏三元组的第二项 whileTrue: entityTemp = sample(self.entityList.keys(), 1)[0] if entityTemp != triplet[1]: break corruptedTriplet = (triplet[0], entityTemp, triplet[2]) return corruptedTriplet
defnorm(list): ''' 归一化 :param 向量 :return: 向量的平方和的开方后的向量 ''' var = linalg.norm(list) i = 0 while i < len(list): list[i] = list[i]/var i += 1 return array(list)
defdistanceL1(h, t ,r): s = h + r - t sum = fabs(s).sum() returnsum
defdistanceL2(h, t, r): s = h + r - t sum = (s*s).sum() returnsum
defopenDetailsAndId(dir,sp=" "): idNum = 0 list = [] withopen(dir,"r", encoding="utf-8") as file: lines = file.readlines() for line in lines: DetailsAndId = line.strip().split(sp) list.append(DetailsAndId[0]) idNum += 1 return idNum, list
defopenTrain(dir,sp=" "): num = 0 list = [] withopen(dir, "r", encoding="utf-8") as file: lines = file.readlines() for line in lines: triple = line.strip().split(sp) if(len(triple)<3): continue list.append(tuple(triple)) num += 1 return num, list