|
def filter_tags(row, filt, operator): |
|
tags = row["tags"] |
|
tags[:] = [d for d in tags if isinstance(d, str)] |
|
if operator == "All": |
|
if all(elem in tags for elem in filt): |
|
return True |
|
|
|
s1 = set(tags) |
|
s2 = set(filt) |
|
if operator == "Any": |
|
if bool(s1 & s2): |
|
return True |
|
if operator == "None": |
|
if len(s1.intersection(s2)) == 0: |
|
return True |
|
return False |
|
|
|
|
|
def filter_pipeline_data(data, modality, pipeline, library,framework, tags, operator): |
|
data = data[data['pipeline'].notna()] |
|
|
|
if modality != "All": |
|
data = data[data["modality"] == modality.lower()] |
|
|
|
if pipeline != "all": |
|
data = data[data["pipeline"] == pipeline] |
|
|
|
if library != "all" and library != "not transformers": |
|
data = data[data["library"] == library] |
|
if library == "not transformers": |
|
data = data[data["library"] != "transformers"] |
|
|
|
if framework != "all": |
|
print(framework) |
|
data = data[data[framework] == 1] |
|
|
|
if tags != []: |
|
data = data[data.apply(filter_tags, axis=1, filt=tags, operator=operator)] |
|
|
|
tags = data["tags"].explode() |
|
tags = tags[tags.notna()].value_counts().rename_axis("tag").to_frame('counts').reset_index() |
|
s = tags["tag"] |
|
s = s[s.apply(type) == str] |
|
|
|
return data, s.unique() |