beweinreich's picture
improvements to cleaning traits
cc41c90
raw
history blame
500 Bytes
import re
def clean_trait(self, trait):
# Remove line breaks, leading/trailing whitespace, and unnecessary dashes
cleaned_trait = trait.replace('\n', '').strip()
cleaned_trait = re.sub(r'^-+', '', cleaned_trait).strip()
cleaned_trait = cleaned_trait.rstrip('.')
cleaned_trait = cleaned_trait.lower()
# If the cleaned trait has more than 4 words, it's likely a sentence, so let's remove it
if len(cleaned_trait.split()) > 4:
return None
return cleaned_trait