Linker1907 commited on
Commit
724b1ea
·
1 Parent(s): 5a06175
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import streamlit.components.v1 as components
3
+ import json
4
+
5
+ BAD_EXAMPLES_PATH = "bad_examples"
6
+ DATA_PATH = "data"
7
+
8
+ def load_jsonl(file_path):
9
+ data = []
10
+ with open(file_path, 'r') as f:
11
+ for line in f:
12
+ data.append(json.loads(line))
13
+
14
+ return data
15
+
16
+
17
+ if 'idx' not in st.session_state:
18
+ st.session_state.idx = 0
19
+
20
+ def get_next_item():
21
+ st.session_state.idx += 1
22
+
23
+ def save_and_get_next_item(sample):
24
+
25
+ with open(f'{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl', 'a') as f:
26
+ f.write(json.dumps(sample) + '\n')
27
+
28
+ get_next_item()
29
+
30
+
31
+ datasets = ['gutenberg_raw', "stackexchange2", "bigcode_python_code", "bigcode_python_github_issues", "bigcode_python_jupyter_scripts_dedup_filtered", "books3", "c4", "s2orc_raw"]
32
+ dataset = st.sidebar.selectbox("Dataset", datasets)
33
+ data = load_jsonl(f'{DATA_PATH}/{dataset}_examples_with_stats.json')
34
+
35
+ # create bad file if it does not exists
36
+ with open(f'{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl', 'a') as f:
37
+ pass
38
+
39
+ st.sidebar.button("Reset Index", on_click=lambda: st.session_state.__delitem__('idx'))
40
+
41
+ with open(f'{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl', "r+") as f:
42
+ st.sidebar.download_button('Download bad example JSON file', f)
43
+
44
+ st.sidebar.button("Clear bad examples file", on_click=lambda: open(f'{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl', 'w').close())
45
+
46
+ with st.form(key='checkbox', clear_on_submit=True):
47
+ sample = data[st.session_state.idx]
48
+ text = sample["text"]
49
+ st.text_area(f"text id: {st.session_state.idx}", text, height=500)
50
+
51
+ good = st.form_submit_button('GOOD', on_click=get_next_item)
52
+ bad = st.form_submit_button('BAD', on_click=save_and_get_next_item, args=(sample,))
bad_examples/bigcode_python_code_bad_examples.jsonl ADDED
File without changes
bad_examples/bigcode_python_github_issues_bad_examples.jsonl ADDED
File without changes
bad_examples/bigcode_python_jupyter_scripts_dedup_filtered_bad_examples.jsonl ADDED
File without changes
bad_examples/books3_bad_examples.jsonl ADDED
File without changes
bad_examples/c4_bad_examples.jsonl ADDED
File without changes
bad_examples/gutenberg_raw_bad_examples.jsonl ADDED
File without changes
bad_examples/s2orc_raw_bad_examples.jsonl ADDED
File without changes
bad_examples/stackexchange2_bad_examples.jsonl ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"question_id": 151, "text": "user68: I don't much like the hermeneutical-approaches tag. For one thing, it's too long. I would like to change it to hermeneutics. But that could be confusing since a naive user might wonder why all questions aren't tagged that way as it's the name of the site. \nAnother suggestion is approaches which has an elegance to it, but might not be immediately obvious to someone groping for the proper tag for a question that's not exegesis.\nLooking at the hermeneutical-approaches questions is there a tag (or tags) that would be a better label?\n\nuser67: Great suggestion; I don't see any questions tagged as hermeneutical-approaches that wouldn't be fine tagged hermeneutics. \nThe site's name is a bit of a problem for me as well; most of the questions on this site don't even fit the named topic. But I'd be happy with making the tag simply hermeneutics\n\nuser15: We can't make it hermeneutics because of the site-wide block on that tag. (I've already tried.) \nWe can look into getting this removed, since this site isn't exclusively about hermeneutics. However, it will take a bit of effort from the SE people. And they are a tough crowd to convince. Many (some?) of them are blatantly opposed to any tags of this sort.\n", "metadata": "[https://hermeneutics.meta.stackexchange.com/questions/151, https://hermeneutics.meta.stackexchange.com, https://hermeneutics.meta.stackexchange.com/users/68/]", "date": "2011/11/03", "original_text": "[68: <p>I don't much like the <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/hermeneutical-approaches\" class=\"post-tag\" title=\"show questions tagged &#39;hermeneutical-approaches&#39;\" rel=\"tag\">hermeneutical-approaches</a> tag. For one thing, it's too long. I would like to change it to <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/hermeneutics\" class=\"post-tag\" title=\"show questions tagged &#39;hermeneutics&#39;\" rel=\"tag\">hermeneutics</a>. But that could be confusing since a naive user might wonder why all questions aren't tagged that way as it's the name of the site. </p>\n\n<p>Another suggestion is <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/approaches\" class=\"post-tag\" title=\"show questions tagged &#39;approaches&#39;\" rel=\"tag\">approaches</a> which has an elegance to it, but might not be immediately obvious to someone groping for the proper tag for a question that's not <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/exegesis\" class=\"post-tag\" title=\"show questions tagged &#39;exegesis&#39;\" rel=\"tag\">exegesis</a>.</p>\n\n<p>Looking at the <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/hermeneutical-approaches\" class=\"post-tag\" title=\"show questions tagged &#39;hermeneutical-approaches&#39;\" rel=\"tag\">hermeneutical-approaches</a> questions is there a tag (or tags) that would be a better label?</p>\n, 67: <p>Great suggestion; I don't see any questions tagged as <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/hermeneutical-approaches\" class=\"post-tag\" title=\"show questions tagged &#39;hermeneutical-approaches&#39;\" rel=\"tag\">hermeneutical-approaches</a> that wouldn't be fine tagged <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/hermeneutics\" class=\"post-tag\" title=\"show questions tagged &#39;hermeneutics&#39;\" rel=\"tag\">hermeneutics</a>. </p>\n\n<p>The site's name is a bit of a problem for me as well; most of the questions on this site don't even fit the named topic. But I'd be happy with making the tag simply <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/hermeneutics\" class=\"post-tag\" title=\"show questions tagged &#39;hermeneutics&#39;\" rel=\"tag\">hermeneutics</a></p>\n, 15: <p>We can't make it <a href=\"https://hermeneutics.stackexchange.com/questions/tagged/hermeneutics\" class=\"post-tag\" title=\"show questions tagged &#39;hermeneutics&#39;\" rel=\"tag\">hermeneutics</a> because of the site-wide block on that tag. (I've already tried.) </p>\n\n<p>We <em>can</em> look into getting this removed, since this site isn't exclusively about hermeneutics. However, it will take a bit of effort from the SE people. And they are a tough crowd to convince. Many (some?) of them are blatantly opposed to any tags of this sort.</p>\n]", "character_repetition_ratio": {"10": 0.0893, "11": 0.082, "12": 0.0649, "13": 0.0567, "14": 0.0485, "15": 0.042, "2": 0.2842, "3": 0.1801, "4": 0.1615, "5": 0.142, "6": 0.1258, "7": 0.1161, "8": 0.1072, "9": 0.0975}, "word_repetition_ratio": {"10": 0.0, "3": 0.0096, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2262001627, "lang_id_score": 1.0, "stopwords_ratio": 0.5619047619, "flagged_words_ratio": 0.0, "perplexity_score": 202.2}
2
+ {"question_id": 777, "text": "user6411: My meta question is prompted by this old question Will a tent provide some protection from bears? which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? \n\nuser8732: If questions are about bears in general, then the bears tag should be employed.\nThat said and seeing that there are eight species of bears in the world, it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.\nThe differences between bear species is quite amazing, but questions on the subspecies such as seen in this Wikipedia link should remain within the scope of the bear species in question!\n", "metadata": "[https://outdoors.meta.stackexchange.com/questions/777, https://outdoors.meta.stackexchange.com, https://outdoors.meta.stackexchange.com/users/6411/]", "date": "2016/05/15", "original_text": "[6411: <p>My meta question is prompted by this old question <a href=\"https://outdoors.stackexchange.com/questions/418/will-a-tent-provide-some-protection-from-bears\">Will a tent provide some protection from bears?</a> which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? </p>\n, 8732: <p>If questions are about bears in general, then the <strong>bears</strong> tag should be employed.</p>\n<p>That said and seeing that there are <a href=\"http://bearwithus.org/8-bears-of-the-world/\" rel=\"nofollow noreferrer\">eight species of bears in the world,</a> it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.</p>\n<p>The differences between bear species is quite amazing, but questions on the subspecies such as seen in this <a href=\"https://en.wikipedia.org/wiki/List_of_bears\" rel=\"nofollow noreferrer\">Wikipedia link</a> should remain within the scope of the bear species in question!</p>\n]", "character_repetition_ratio": {"10": 0.063, "11": 0.0421, "12": 0.0257, "13": 0.0117, "14": 0.0047, "15": 0.0023, "2": 0.296, "3": 0.2083, "4": 0.1842, "5": 0.1589, "6": 0.1278, "7": 0.107, "8": 0.0943, "9": 0.0816}, "word_repetition_ratio": {"10": 0.0, "3": 0.0, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2020785219, "lang_id_score": 1.0, "stopwords_ratio": 0.527027027, "flagged_words_ratio": 0.0, "perplexity_score": 672.9}
3
+ {"question_id": 777, "text": "user6411: My meta question is prompted by this old question Will a tent provide some protection from bears? which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? \n\nuser8732: If questions are about bears in general, then the bears tag should be employed.\nThat said and seeing that there are eight species of bears in the world, it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.\nThe differences between bear species is quite amazing, but questions on the subspecies such as seen in this Wikipedia link should remain within the scope of the bear species in question!\n", "metadata": "[https://outdoors.meta.stackexchange.com/questions/777, https://outdoors.meta.stackexchange.com, https://outdoors.meta.stackexchange.com/users/6411/]", "date": "2016/05/15", "original_text": "[6411: <p>My meta question is prompted by this old question <a href=\"https://outdoors.stackexchange.com/questions/418/will-a-tent-provide-some-protection-from-bears\">Will a tent provide some protection from bears?</a> which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? </p>\n, 8732: <p>If questions are about bears in general, then the <strong>bears</strong> tag should be employed.</p>\n<p>That said and seeing that there are <a href=\"http://bearwithus.org/8-bears-of-the-world/\" rel=\"nofollow noreferrer\">eight species of bears in the world,</a> it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.</p>\n<p>The differences between bear species is quite amazing, but questions on the subspecies such as seen in this <a href=\"https://en.wikipedia.org/wiki/List_of_bears\" rel=\"nofollow noreferrer\">Wikipedia link</a> should remain within the scope of the bear species in question!</p>\n]", "character_repetition_ratio": {"10": 0.063, "11": 0.0421, "12": 0.0257, "13": 0.0117, "14": 0.0047, "15": 0.0023, "2": 0.296, "3": 0.2083, "4": 0.1842, "5": 0.1589, "6": 0.1278, "7": 0.107, "8": 0.0943, "9": 0.0816}, "word_repetition_ratio": {"10": 0.0, "3": 0.0, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2020785219, "lang_id_score": 1.0, "stopwords_ratio": 0.527027027, "flagged_words_ratio": 0.0, "perplexity_score": 672.9}
4
+ {"question_id": 777, "text": "user6411: My meta question is prompted by this old question Will a tent provide some protection from bears? which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? \n\nuser8732: If questions are about bears in general, then the bears tag should be employed.\nThat said and seeing that there are eight species of bears in the world, it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.\nThe differences between bear species is quite amazing, but questions on the subspecies such as seen in this Wikipedia link should remain within the scope of the bear species in question!\n", "metadata": "[https://outdoors.meta.stackexchange.com/questions/777, https://outdoors.meta.stackexchange.com, https://outdoors.meta.stackexchange.com/users/6411/]", "date": "2016/05/15", "original_text": "[6411: <p>My meta question is prompted by this old question <a href=\"https://outdoors.stackexchange.com/questions/418/will-a-tent-provide-some-protection-from-bears\">Will a tent provide some protection from bears?</a> which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? </p>\n, 8732: <p>If questions are about bears in general, then the <strong>bears</strong> tag should be employed.</p>\n<p>That said and seeing that there are <a href=\"http://bearwithus.org/8-bears-of-the-world/\" rel=\"nofollow noreferrer\">eight species of bears in the world,</a> it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.</p>\n<p>The differences between bear species is quite amazing, but questions on the subspecies such as seen in this <a href=\"https://en.wikipedia.org/wiki/List_of_bears\" rel=\"nofollow noreferrer\">Wikipedia link</a> should remain within the scope of the bear species in question!</p>\n]", "character_repetition_ratio": {"10": 0.063, "11": 0.0421, "12": 0.0257, "13": 0.0117, "14": 0.0047, "15": 0.0023, "2": 0.296, "3": 0.2083, "4": 0.1842, "5": 0.1589, "6": 0.1278, "7": 0.107, "8": 0.0943, "9": 0.0816}, "word_repetition_ratio": {"10": 0.0, "3": 0.0, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2020785219, "lang_id_score": 1.0, "stopwords_ratio": 0.527027027, "flagged_words_ratio": 0.0, "perplexity_score": 672.9}
5
+ {"question_id": 777, "text": "user6411: My meta question is prompted by this old question Will a tent provide some protection from bears? which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? \n\nuser8732: If questions are about bears in general, then the bears tag should be employed.\nThat said and seeing that there are eight species of bears in the world, it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.\nThe differences between bear species is quite amazing, but questions on the subspecies such as seen in this Wikipedia link should remain within the scope of the bear species in question!\n", "metadata": "[https://outdoors.meta.stackexchange.com/questions/777, https://outdoors.meta.stackexchange.com, https://outdoors.meta.stackexchange.com/users/6411/]", "date": "2016/05/15", "original_text": "[6411: <p>My meta question is prompted by this old question <a href=\"https://outdoors.stackexchange.com/questions/418/will-a-tent-provide-some-protection-from-bears\">Will a tent provide some protection from bears?</a> which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? </p>\n, 8732: <p>If questions are about bears in general, then the <strong>bears</strong> tag should be employed.</p>\n<p>That said and seeing that there are <a href=\"http://bearwithus.org/8-bears-of-the-world/\" rel=\"nofollow noreferrer\">eight species of bears in the world,</a> it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.</p>\n<p>The differences between bear species is quite amazing, but questions on the subspecies such as seen in this <a href=\"https://en.wikipedia.org/wiki/List_of_bears\" rel=\"nofollow noreferrer\">Wikipedia link</a> should remain within the scope of the bear species in question!</p>\n]", "character_repetition_ratio": {"10": 0.063, "11": 0.0421, "12": 0.0257, "13": 0.0117, "14": 0.0047, "15": 0.0023, "2": 0.296, "3": 0.2083, "4": 0.1842, "5": 0.1589, "6": 0.1278, "7": 0.107, "8": 0.0943, "9": 0.0816}, "word_repetition_ratio": {"10": 0.0, "3": 0.0, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2020785219, "lang_id_score": 1.0, "stopwords_ratio": 0.527027027, "flagged_words_ratio": 0.0, "perplexity_score": 672.9}
6
+ {"question_id": 777, "text": "user6411: My meta question is prompted by this old question Will a tent provide some protection from bears? which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? \n\nuser8732: If questions are about bears in general, then the bears tag should be employed.\nThat said and seeing that there are eight species of bears in the world, it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.\nThe differences between bear species is quite amazing, but questions on the subspecies such as seen in this Wikipedia link should remain within the scope of the bear species in question!\n", "metadata": "[https://outdoors.meta.stackexchange.com/questions/777, https://outdoors.meta.stackexchange.com, https://outdoors.meta.stackexchange.com/users/6411/]", "date": "2016/05/15", "original_text": "[6411: <p>My meta question is prompted by this old question <a href=\"https://outdoors.stackexchange.com/questions/418/will-a-tent-provide-some-protection-from-bears\">Will a tent provide some protection from bears?</a> which resurfaced when a new answer came in. I know it is too late to differentiate on old questions, but, assuming new bear questions will be coming in, can we somehow avoid lumping brown and black bears together in questions and answers? </p>\n, 8732: <p>If questions are about bears in general, then the <strong>bears</strong> tag should be employed.</p>\n<p>That said and seeing that there are <a href=\"http://bearwithus.org/8-bears-of-the-world/\" rel=\"nofollow noreferrer\">eight species of bears in the world,</a> it would be more than appropriate to have a separate tag for each species, providing that the question is asking something particular to the nature of that species.</p>\n<p>The differences between bear species is quite amazing, but questions on the subspecies such as seen in this <a href=\"https://en.wikipedia.org/wiki/List_of_bears\" rel=\"nofollow noreferrer\">Wikipedia link</a> should remain within the scope of the bear species in question!</p>\n]", "character_repetition_ratio": {"10": 0.063, "11": 0.0421, "12": 0.0257, "13": 0.0117, "14": 0.0047, "15": 0.0023, "2": 0.296, "3": 0.2083, "4": 0.1842, "5": 0.1589, "6": 0.1278, "7": 0.107, "8": 0.0943, "9": 0.0816}, "word_repetition_ratio": {"10": 0.0, "3": 0.0, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2020785219, "lang_id_score": 1.0, "stopwords_ratio": 0.527027027, "flagged_words_ratio": 0.0, "perplexity_score": 672.9}
7
+ {"question_id": 174, "text": "user485: I just noticed your site and I'm happy you guys made it into beta; I wish you all the best in making this a great site. \nOne nice way to let the world know about this site is via the Community Ads which are available on fully-grown Stack Exchange sites. There are a bunch of related SE sites and these ads are a potential resource for you guys to find users; for an example see this year's thread in Physics. I've gone ahead and created this image for the Physics site,\n\nInkscape SVG source\nbut if you guys can come up with something more compelling I'm pretty sure we'll be happy to host it over there. You can then post these in other SE sites and essentially ask their communities to advertise you. If you come up with good material, it can also become a good starting point for your site's graphical identity.\nJust something to think about.\n\nUpdate\nSo the Community Ads threads have been changed slightly this year to accommodate a change in the width of the sidebar. To match this, here is a new image to the new specifications:\n\nwith its corresponding Inkscape SVG source. This runs at double the standard size to provide better resolution for retina display users, whatever that actually is. This one is currently in proposed status on the 2016 Physics thread.\n\nuser106: This will be a community wiki answer to track the progress of the ads. As a reminder: A post on a site's Community Ads page needs 6 upvotes for the ad to be shown on that site. Each of our posts on those sites will be tracked here, with the score periodically updated. Don't forget - we can add more ads on other sites (that aren't in beta)!\nElectrical Engineering - 3\nMathematics - 3\nPhysics - 10\nProgrammers - 6\n", "metadata": "[https://engineering.meta.stackexchange.com/questions/174, https://engineering.meta.stackexchange.com, https://engineering.meta.stackexchange.com/users/485/]", "date": "2015/02/10", "original_text": "[485: <p>I just noticed your site and I'm happy you guys made it into beta; I wish you all the best in making this a great site. </p>\n\n<p>One nice way to let the world know about this site is via the Community Ads which are available on fully-grown Stack Exchange sites. There are a bunch of related SE sites and these ads are a potential resource for you guys to find users; for an example see <a href=\"https://physics.meta.stackexchange.com/questions/6388/community-promotion-ads-2015\">this year's thread in Physics</a>. I've gone ahead and created <a href=\"https://physics.meta.stackexchange.com/a/6505/8563\">this image</a> for the Physics site,</p>\n\n<p><img src=\"https://i.stack.imgur.com/yjIbH.png\" alt=\"Engineering Stack Exchange\"></p>\n\n<p><sup><a href=\"https://www.dropbox.com/s/g9ukpl8blkv8qi5/EngineeringAd.svg?dl=0\" rel=\"nofollow noreferrer\">Inkscape SVG source</a></sup></p>\n\n<p>but if you guys can come up with something more compelling I'm pretty sure we'll be happy to host it over there. You can then post these in other SE sites and essentially ask their communities to advertise you. If you come up with good material, it can also become a good starting point for your site's graphical identity.</p>\n\n<p>Just something to think about.</p>\n\n<hr>\n\n<h3>Update</h3>\n\n<p>So the Community Ads threads have been changed slightly this year to accommodate a change in the width of the sidebar. To match this, here is a new image to the new specifications:</p>\n\n<p><a href=\"https://engineering.stackexchange.com/\"><img src=\"https://i.stack.imgur.com/cbf0s.png\" alt=\"Engineering Stack Exchange\"></a></p>\n\n<p>with its corresponding <a href=\"https://dl.dropboxusercontent.com/u/60184856/Stack%20Exchange%20Community%20Ads/engineering%20ad%202016.svg\" rel=\"nofollow noreferrer\">Inkscape SVG source</a>. This runs at double the standard size to provide better resolution for <a href=\"https://en.wikipedia.org/wiki/Retina_Display\" rel=\"nofollow noreferrer\">retina display</a> users, whatever that actually is. This one is currently in proposed status <a href=\"https://physics.meta.stackexchange.com/a/7443/8563\">on the 2016 Physics thread</a>.</p>\n, 106: <p>This will be a community wiki answer to track the progress of the ads. As a reminder: A post on a site's Community Ads page needs 6 upvotes for the ad to be shown on that site. Each of our posts on those sites will be tracked here, with the score periodically updated. Don't forget - we can add more ads on other sites (that aren't in beta)!</p>\n\n<p><a href=\"https://electronics.meta.stackexchange.com/questions/4057/community-promotion-ads-2015/5123#5123\">Electrical Engineering</a> - 3</p>\n\n<p><a href=\"https://math.meta.stackexchange.com/questions/19014/community-promotion-ads-2015/19595#19595\">Mathematics</a> - 3</p>\n\n<p><a href=\"https://physics.meta.stackexchange.com/questions/6388/community-promotion-ads-2015/6505#6505\">Physics</a> - 10</p>\n\n<p><a href=\"https://softwareengineering.meta.stackexchange.com/questions/7070/community-promotion-ads-2015/7169#7169\">Programmers</a> - 6</p>\n]", "character_repetition_ratio": {"10": 0.0467, "11": 0.0349, "12": 0.0284, "13": 0.0231, "14": 0.0178, "15": 0.0124, "2": 0.2971, "3": 0.1713, "4": 0.129, "5": 0.0931, "6": 0.0725, "7": 0.0608, "8": 0.0567, "9": 0.0532}, "word_repetition_ratio": {"10": 0.0, "3": 0.0268, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2210464433, "lang_id_score": 1.0, "stopwords_ratio": 0.488372093, "flagged_words_ratio": 0.0, "perplexity_score": 491.2}
8
+ {"question_id": 360, "text": "user255: I saw this question today, on whether hydrostatic loads are \"dead\" or live\". Very heavily related is this previous question on whether a moving load (that is always somewhere) is \"dead\" or \"live\".\nI expected to be able to close the newer question as a duplicate, because the majority of an answer needs to discuss why we talk about dead loads as opposed to live loads. But, because the actual specific of the question is different, it's not a duplicate.\nI see three possible ways forward:\n\nLeave both questions as they are, separate but related\n\nCreate a new question on \"what is a dead load vs a live load\", and make both existing questions duplicates of that\n\nCreate a new question on \"what is a dead load vs a live load\", make both existing questions related to that; allowing the answers\nin the existing questions to skip the generic dead vs live arugment,\nand only cover the specifics\n\n\nThoughts please.\n\nuser33: Not Yet\nI do think that we will need to use some of these questions as duplicate targets in the future. At the moment, they are different enough that they can stand.\nI am personally waiting for the slightly more complicated questions of what (and why) some loads go into categories other than Dead/Live.\n", "metadata": "[https://engineering.meta.stackexchange.com/questions/360, https://engineering.meta.stackexchange.com, https://engineering.meta.stackexchange.com/users/255/]", "date": "2015/10/02", "original_text": "[255: <p>I saw <a href=\"https://engineering.stackexchange.com/questions/5622/is-hydrostatic-pressure-a-dead-load-or-a-live-load\">this question</a> today, on whether hydrostatic loads are &quot;dead&quot; or live&quot;. Very heavily related is <a href=\"https://engineering.stackexchange.com/questions/2633/is-a-permanent-platform-that-is-hanging-and-moving-a-dead-load-or-a-live-load?rq=1\">this previous question</a> on whether a moving load (that is always <em>somewhere</em>) is &quot;dead&quot; or &quot;live&quot;.</p>\n<p>I expected to be able to close the newer question as a duplicate, because the majority of an answer needs to discuss <em>why</em> we talk about dead loads as opposed to live loads. But, because the actual specific of the question is different, it's not a duplicate.</p>\n<p>I see three possible ways forward:</p>\n<ol>\n<li><p>Leave both questions as they are, separate but <em>related</em></p>\n</li>\n<li><p>Create a new question on &quot;what is a dead load vs a live load&quot;, and make both existing questions <em>duplicates</em> of that</p>\n</li>\n<li><p>Create a new question on &quot;what is a dead load vs a live load&quot;, make both existing questions <em>related</em> to that; allowing the answers\nin the existing questions to skip the generic dead vs live arugment,\nand only cover the specifics</p>\n</li>\n</ol>\n<p>Thoughts please.</p>\n, 33: <h3>Not Yet</h3>\n<p>I do think that we will need to use some of these questions as duplicate targets in the future. At the moment, they are different enough that they can stand.</p>\n<p>I am personally waiting for the slightly more complicated questions of what (and why) some loads go into categories other than Dead/Live.</p>\n]", "character_repetition_ratio": {"10": 0.0769, "11": 0.0672, "12": 0.0631, "13": 0.0615, "14": 0.0599, "15": 0.0592, "2": 0.2862, "3": 0.2124, "4": 0.1767, "5": 0.1426, "6": 0.1223, "7": 0.1094, "8": 0.0997, "9": 0.0867}, "word_repetition_ratio": {"10": 0.0483, "3": 0.1402, "4": 0.1127, "5": 0.0943, "6": 0.0853, "7": 0.0762, "8": 0.067, "9": 0.0577}, "special_characters_ratio": 0.2152721365, "lang_id_score": 1.0, "stopwords_ratio": 0.5138888889, "flagged_words_ratio": 0.0, "perplexity_score": 868.5}
9
+ {"question_id": 48, "text": "user68: The Bitcoin SE site has quite a number of questions asking for explanations of news articles and historic events, possibly because Bitcoin is so commonly misunderstood, but also because there have been many events of theft. \nSometimes such questions are tough to answer, but they can also be very useful for understanding the history.\nAre questions about news/events on-topic here? \neg\n\n\"How many people got on the waiting list for a Raspberry Pi in the first week?\"\n\"How did word spread so widely about the Raspberry Pi? Was it slashdotted?\"\n\n\nuser86: As you said, news can aid the understanding of history and, therefore, why things are done the way they are. Many news articles on tech are ill-informed and vague and SE is a very good platform to seek clarification. \n\nuser35: I think we should keep it limited to software/hardware questions, and keep purchasing/supply/etc news at the source of the information - the foundation. If Raspberry Pi news is relevant in an answer, it should be included.\nOn topic:\nQ: How can I install distro X?\nA: Distro X is not currently supported, but is expected to be released mid 2013. (See the Raspberry Pi blog [here]). \nIn mid 2013, this answer should be updated to include information about how to install distro X.\nOff topic:\nQ: When is distro X going to be released?\nQ: Why isn't distro X supported?\n", "metadata": "[https://raspberrypi.meta.stackexchange.com/questions/48, https://raspberrypi.meta.stackexchange.com, https://raspberrypi.meta.stackexchange.com/users/68/]", "date": "2012/06/13", "original_text": "[68: <p>The Bitcoin SE site has quite a number of questions asking for explanations of news articles and historic events, possibly because Bitcoin is so commonly misunderstood, but also because there have been many events of theft. </p>\n\n<p>Sometimes such questions are tough to answer, but they can also be very useful for understanding the history.</p>\n\n<p>Are questions about news/events on-topic here? </p>\n\n<p>eg</p>\n\n<ul>\n<li>\"How many people got on the waiting list for a Raspberry Pi in the first week?\"</li>\n<li>\"How did word spread so widely about the Raspberry Pi? Was it slashdotted?\"</li>\n</ul>\n, 86: <p>As you said, news can aid the understanding of history and, therefore, why things are done the way they are. Many news articles on tech are ill-informed and vague and SE is a very good platform to seek clarification. </p>\n, 35: <p>I think we should keep it limited to software/hardware questions, and keep purchasing/supply/etc news at the source of the information - the foundation. If Raspberry Pi news is relevant in an answer, it should be included.</p>\n\n<p><strong>On topic:</strong><br>\nQ: How can I install distro X?</p>\n\n<p>A: Distro X is not currently supported, but is expected to be released mid 2013. (See the Raspberry Pi blog [here]). </p>\n\n<p>In mid 2013, this answer should be updated to include information about how to install distro X.</p>\n\n<p><strong>Off topic:</strong><br>\nQ: When is distro X going to be released?<br>\nQ: Why isn't distro X supported?</p>\n]", "character_repetition_ratio": {"10": 0.0617, "11": 0.0602, "12": 0.0491, "13": 0.035, "14": 0.0246, "15": 0.0134, "2": 0.2718, "3": 0.1582, "4": 0.1198, "5": 0.1007, "6": 0.0896, "7": 0.0808, "8": 0.072, "9": 0.0661}, "word_repetition_ratio": {"10": 0.0, "3": 0.0264, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2265682657, "lang_id_score": 1.0, "stopwords_ratio": 0.4759825328, "flagged_words_ratio": 0.0, "perplexity_score": 501.3}
10
+ {"question_id": 688, "text": "user800: The question For a English Grammar Course, what should be included in the list below and what should be removed? was closed because, according to the three close voters, it \"needs to be more focused\". The question lists a long number of topics that would need to be covered in an envisioned English grammar course. It also describes the audience for this course: L2 learners in lower-upper secondary school who will be taking exams that focus a lot on grammar.\nThe close voters did not provide any specific reasons for their close votes, nor advice on how the question could be improved. Questions on teaching grammar are on topic on this site and I don't see how this question could be split up into smaller ones in a meaningful way, since the question is about a syllabus as a whole.\nFor these reasons, I am asking how the question can be improved. If not, I would like to see a justification for the close votes that is more meaningful than \"lacking focus\".\n\nuser3595: I am by no means an English teacher, though I do have teacher training on a different subject.\nSome things that might help:\n\nThe course is aimed for some specific exams and presumably in a specific country or region. Naming this would give context to those who know about such matters. Is the course aimed for passing the exams (and maybe learning a bit of the language) or learning the language and thereby passing the exams, too?\nIs there a lack of time when compared to content, as usual when teaching my field? How deeply is one intending to teach the content? For example, prepositions use could easily be added into example sentences that illustrate another point by adding a few words into an example sentence. Or one could have lessons devoted to practicing them.\nA more abstract question about how to select grammar content for such a course might also be fruitful.\n\n", "metadata": "[https://languagelearning.meta.stackexchange.com/questions/688, https://languagelearning.meta.stackexchange.com, https://languagelearning.meta.stackexchange.com/users/800/]", "date": "2020/08/08", "original_text": "[800: <p>The question <a href=\"https://languagelearning.stackexchange.com/q/4832/800\">For a English Grammar Course, what should be included in the list below and what should be removed?</a> was closed because, according to the three close voters, it &quot;needs to be more focused&quot;. The question lists a long number of topics that would need to be covered in an envisioned English grammar course. It also describes the audience for this course: L2 learners in lower-upper secondary school who will be taking exams that focus a lot on grammar.</p>\n<p>The close voters did not provide any <em>specific</em> reasons for their close votes, nor advice on how the question could be improved. Questions on teaching grammar are on topic on this site and I don't see how this question could be split up into smaller ones in a <em>meaningful</em> way, since the question is about a syllabus as a whole.</p>\n<p>For these reasons, I am asking how the question can be improved. If not, I would like to see a justification for the close votes that is more meaningful than &quot;lacking focus&quot;.</p>\n, 3595: <p>I am by no means an English teacher, though I do have teacher training on a different subject.</p>\n<p>Some things that might help:</p>\n<ol>\n<li>The course is aimed for some specific exams and presumably in a specific country or region. Naming this would give context to those who know about such matters. Is the course aimed for passing the exams (and maybe learning a bit of the language) or learning the language and thereby passing the exams, too?</li>\n<li>Is there a lack of time when compared to content, as usual when teaching my field? How deeply is one intending to teach the content? For example, prepositions use could easily be added into example sentences that illustrate another point by adding a few words into an example sentence. Or one could have lessons devoted to practicing them.</li>\n<li>A more abstract question about how to select grammar content for such a course might also be fruitful.</li>\n</ol>\n]", "character_repetition_ratio": {"10": 0.0552, "11": 0.052, "12": 0.0488, "13": 0.0412, "14": 0.0298, "15": 0.0206, "2": 0.2936, "3": 0.1881, "4": 0.1424, "5": 0.1123, "6": 0.0956, "7": 0.0848, "8": 0.0697, "9": 0.0606}, "word_repetition_ratio": {"10": 0.0, "3": 0.0308, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2035541195, "lang_id_score": 1.0, "stopwords_ratio": 0.5474006116, "flagged_words_ratio": 0.0, "perplexity_score": 368.0}
11
+ {"question_id": 613, "text": "user3143: Since we accept some questions about species identification, I wonder how we should tag these.\nFor this question I created the animal-identification, but I am unsure if this is a senseful thing to do. Should we have a tag identification or rather two, animal-identification and plant-identification? Or something completely different?\n\nuser91: These tags are called 'identify-this' tags, so I would suggest following the same naming conventions as the rest of the network:\nidentify-this-animal identify-this-plant\nThis avoids any surprises when folks folks visit here from other sites. Plus, folks can easily 'favorite' or 'ignore' these tags by entering, Ignored Tags: [identify-this-*]. \nThis naming convention can also be expanded to include other areas of identification as needed:\nidentify-this-track identify-this-scat, etc, etc. \n", "metadata": "[https://outdoors.meta.stackexchange.com/questions/613, https://outdoors.meta.stackexchange.com, https://outdoors.meta.stackexchange.com/users/3143/]", "date": "2015/04/18", "original_text": "[3143: <p>Since we accept some questions about <a href=\"https://outdoors.meta.stackexchange.com/q/215/3143\">species identification</a>, I wonder how we should tag these.</p>\n\n<p>For <a href=\"https://outdoors.stackexchange.com/q/8194/3143\">this</a> question I created the <a href=\"https://outdoors.stackexchange.com/questions/tagged/animal-identification\" class=\"post-tag\" title=\"show questions tagged &#39;animal-identification&#39;\" rel=\"tag\">animal-identification</a>, but I am unsure if this is a senseful thing to do. Should we have a tag <a href=\"https://outdoors.stackexchange.com/questions/tagged/identification\" class=\"post-tag\" title=\"show questions tagged &#39;identification&#39;\" rel=\"tag\">identification</a> or rather two, <a href=\"https://outdoors.stackexchange.com/questions/tagged/animal-identification\" class=\"post-tag\" title=\"show questions tagged &#39;animal-identification&#39;\" rel=\"tag\">animal-identification</a> and <a href=\"https://outdoors.stackexchange.com/questions/tagged/plant-identification\" class=\"post-tag\" title=\"show questions tagged &#39;plant-identification&#39;\" rel=\"tag\">plant-identification</a>? Or something completely different?</p>\n, 91: <p>These tags are called 'identify-this' tags, so I would suggest following the same naming conventions as the rest of the network:</p>\n\n<p><a href=\"https://outdoors.stackexchange.com/questions/tagged/identify-this-animal\" class=\"post-tag\" title=\"show questions tagged &#39;identify-this-animal&#39;\" rel=\"tag\">identify-this-animal</a> <a href=\"https://outdoors.stackexchange.com/questions/tagged/identify-this-plant\" class=\"post-tag\" title=\"show questions tagged &#39;identify-this-plant&#39;\" rel=\"tag\">identify-this-plant</a></p>\n\n<p>This avoids any surprises when folks folks visit here from other sites. Plus, folks can easily 'favorite' or 'ignore' these tags by entering, <code>Ignored Tags: [identify-this-*]</code>. </p>\n\n<p>This naming convention can also be expanded to include <em>other</em> areas of identification as needed:</p>\n\n<p><a href=\"https://outdoors.stackexchange.com/questions/tagged/identify-this-track\" class=\"post-tag\" title=\"show questions tagged &#39;identify-this-track&#39;\" rel=\"tag\">identify-this-track</a> <a href=\"https://outdoors.stackexchange.com/questions/tagged/identify-this-scat\" class=\"post-tag\" title=\"show questions tagged &#39;identify-this-scat&#39;\" rel=\"tag\">identify-this-scat</a>, etc, etc. </p>\n]", "character_repetition_ratio": {"10": 0.1146, "11": 0.1051, "12": 0.0957, "13": 0.0838, "14": 0.0671, "15": 0.0516, "2": 0.2813, "3": 0.2189, "4": 0.2002, "5": 0.1756, "6": 0.1615, "7": 0.1463, "8": 0.1357, "9": 0.124}, "word_repetition_ratio": {"10": 0.0, "3": 0.0, "4": 0.0, "5": 0.0, "6": 0.0, "7": 0.0, "8": 0.0, "9": 0.0}, "special_characters_ratio": 0.2030696576, "lang_id_score": 1.0, "stopwords_ratio": 0.4916666667, "flagged_words_ratio": 0.0, "perplexity_score": 773.0}