rathi commited on
Commit
12f82b6
1 Parent(s): 14cb1bd

First version of genre based GPT-2 finetuned story generator

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<surrealism>": 50410, "<children's/family>": 50381, "<epic>": 50418, "<crime thriller>": 50607, "<indian western>": 50598, "<japanese movies>": 50622, "<anthology>": 50364, "<juvenile delinquency film>": 50479, "<hip hop movies>": 50469, "<hybrid western>": 50277, "<family-oriented adventure>": 50390, "<caper story>": 50523, "<feminist film>": 50392, "<roadshow theatrical release>": 50453, "<road-horror>": 50261, "<chinese movies>": 50561, "<sword and sorcery>": 50397, "<jungle film>": 50555, "<future noir>": 50283, "<beach party film>": 50326, "<thriller>": 50567, "<film>": 50484, "<environmental science>": 50431, "<patriotic film>": 50606, "<comedy of errors>": 50455, "<plague>": 50353, "<buddy cop>": 50385, "<escape film>": 50611, "<stoner film>": 50267, "<stand-up comedy>": 50556, "<health & fitness>": 50282, "<czechoslovak new wave>": 50427, "<anime>": 50306, "<heaven-can-wait fantasies>": 50473, "<animal picture>": 50297, "<satire>": 50345, "<parkour in popular culture>": 50560, "<gay themed>": 50539, "<courtroom drama>": 50337, "<political cinema>": 50304, "<erotic thriller>": 50314, "<screwball comedy>": 50344, "<natural horror films>": 50339, "<giallo>": 50590, "<prison film>": 50281, "<medical fiction>": 50583, "<sword and sorcery films>": 50301, "<filipino movies>": 50305, "<linguistics>": 50440, "<airplanes and airports>": 50308, "<road movie>": 50511, "<alien film>": 50424, "<chase movie>": 50434, "<mythological fantasy>": 50527, "<auto racing>": 50311, "<outlaw>": 50584, "<film noir>": 50391, "<point of view shot>": 50435, "<coming of age>": 50603, "<monster movie>": 50538, "<private military company>": 50553, "<pornography>": 50464, "<conspiracy fiction>": 50547, "<homoeroticism>": 50359, "<wuxia>": 50265, "<fantasy>": 50302, "<social problem film>": 50593, "<propaganda film>": 50376, "<libraries and librarians>": 50348, "<sports>": 50296, "<sci-fi thriller>": 50546, "<travel>": 50577, "<therimin music>": 50264, "<war effort>": 50388, "<computer animation>": 50333, "<psychological horror>": 50372, "<british new wave>": 50524, "<star vehicle>": 50292, "<comedy>": 50425, "<film & television history>": 50442, "<adventure comedy>": 50336, "<rockumentary>": 50316, "<family drama>": 50608, "<domestic comedy>": 50343, "<world history>": 50550, "<outlaw biker film>": 50319, "<heist>": 50426, "<costume horror>": 50488, "<sword and sandal>": 50609, "<cult>": 50460, "<art film>": 50284, "<essay film>": 50452, "<film adaptation>": 50389, "<doomsday film>": 50360, "<romantic comedy>": 50268, "<computers>": 50534, "<kitchen sink realism>": 50568, "<b-movie>": 50503, "<silent film>": 50585, "<steampunk>": 50586, "<haunted house film>": 50358, "<addiction drama>": 50356, "<whodunit>": 50513, "<period horror>": 50595, "<archaeology>": 50375, "<christmas movie>": 50459, "<softcore porn>": 50478, "<law & crime>": 50521, "<blaxploitation>": 50335, "<latino>": 50307, "<science fiction>": 50327, "<gothic film>": 50365, "<black-and-white>": 50552, "<feature film>": 50510, "<bloopers & candid camera>": 50594, "<religious film>": 50554, "<anti-war film>": 50379, "<school story>": 50342, "<political thriller>": 50571, "<media studies>": 50405, "<bollywood>": 50287, "<breakdance>": 50490, "<black comedy>": 50349, "<samurai cinema>": 50514, "<children's fantasy>": 50433, "<natural disaster>": 50592, "<animation>": 50273, "<PAD>": 50259, "<anthropology>": 50411, "<romantic thriller>": 50363, "<operetta>": 50496, "<stop motion>": 50394, "<musical>": 50367, "<music>": 50421, "<northern>": 50263, "<slapstick>": 50417, "<parody>": 50587, "<legal drama>": 50482, "<sex comedy>": 50494, "<buddy picture>": 50328, "<biopic [feature]>": 50575, "<horror comedy>": 50604, "<the netherlands in world war ii>": 50402, "<sci-fi adventure>": 50475, "<childhood drama>": 50531, "<statutory rape>": 50509, "<expressionism>": 50373, "<comedy horror>": 50456, "<prison escape>": 50318, "<war film>": 50262, "<fairy tale>": 50565, "<bengali cinema>": 50570, "<gross-out film>": 50293, "<historical documentaries>": 50438, "<action>": 50466, "<holiday film>": 50414, "<heavenly comedy>": 50581, "<z movie>": 50498, "<historical epic>": 50404, "<women in prison films>": 50429, "<buddy film>": 50461, "<period piece>": 50449, "<female buddy film>": 50613, "<revisionist western>": 50378, "<news>": 50340, "<gay pornography>": 50474, "<western>": 50543, "<fantasy drama>": 50400, "<nuclear warfare>": 50481, "<superhero>": 50412, "<clay animation>": 50506, "<costume drama>": 50526, "<neorealism>": 50600, "<dogme 95>": 50278, "<adult>": 50487, "<pornographic movie>": 50415, "<political satire>": 50535, "<instrumental music>": 50545, "<prison>": 50330, "<comedy of manners>": 50269, "<mumblecore>": 50295, "<slice of life story>": 50310, "<revenge>": 50557, "<marriage drama>": 50419, "<existentialism>": 50542, "<bruceploitation>": 50505, "<new hollywood>": 50525, "<romantic drama>": 50540, "<cyberpunk>": 50289, "<pinku eiga>": 50395, "<mystery>": 50362, "<pre-code>": 50458, "<workplace comedy>": 50351, "<revisionist fairy tale>": 50601, "<race movie>": 50286, "<sci-fi horror>": 50370, "<comedy-drama>": 50260, "<children's entertainment>": 50447, "<lgbt>": 50529, "<zombie film>": 50416, "<gender issues>": 50323, "<coming-of-age film>": 50275, "<BOS>": 50257, "<baseball>": 50439, "<camp>": 50501, "<dance>": 50533, "<splatter film>": 50517, "<roadshow/carny>": 50383, "<supernatural>": 50507, "<combat films>": 50270, "<creature film>": 50384, "<gulf war>": 50272, "<gross out>": 50266, "<space opera>": 50462, "<humour>": 50463, "<sci fi pictures original films>": 50457, "<crime fiction>": 50317, "<goat gland>": 50573, "<beach film>": 50401, "<extreme sports>": 50279, "<political documetary>": 50320, "<glamorized spy film>": 50621, "<psychological thriller>": 50465, "<comdedy>": 50612, "<world cinema>": 50500, "<backstage musical>": 50610, "<animals>": 50355, "<malayalam cinema>": 50329, "<suspense>": 50408, "<tamil cinema>": 50569, "<tragicomedy>": 50515, "<exploitation>": 50619, "<science fiction western>": 50432, "<political drama>": 50508, "<inspirational drama>": 50350, "<action comedy>": 50574, "<history>": 50597, "<social issues>": 50341, "<sponsored film>": 50386, "<film à clef>": 50582, "<educational>": 50615, "<ensemble film>": 50291, "<biography>": 50536, "<dystopia>": 50512, "<finance & investing>": 50493, "<werewolf fiction>": 50441, "<television movie>": 50576, "<indie>": 50454, "<kafkaesque>": 50548, "<movies about gladiators>": 50430, "<costume adventure>": 50522, "<adventure>": 50413, "<punk rock>": 50313, "<fictional film>": 50617, "<cavalry film>": 50309, "<demonic child>": 50303, "<business>": 50476, "<christian film>": 50366, "<mondo film>": 50312, "<silhouette animation>": 50580, "<tollywood>": 50530, "<detective fiction>": 50563, "<culture & society>": 50315, "<illnesses & disabilities>": 50377, "<acid western>": 50483, "<disaster>": 50380, "<crime comedy>": 50436, "<sexploitation>": 50564, "<musical drama>": 50338, "<time travel>": 50399, "<courtroom comedy>": 50361, "<archives and records>": 50280, "<chick flick>": 50322, "<teen>": 50346, "<early black cinema>": 50579, "<martial arts film>": 50352, "<action/adventure>": 50519, "<movie serial>": 50518, "<documentary>": 50599, "<british empire film>": 50446, "<anti-war>": 50477, "<americana>": 50559, "<singing cowboy>": 50562, "<animated musical>": 50369, "<family & personal relationships>": 50428, "<supermarionation>": 50566, "<fantasy comedy>": 50472, "<media satire>": 50382, "<boxing>": 50406, "<absurdism>": 50616, "<apocalyptic and post-apocalyptic fiction>": 50357, "<comedy film>": 50290, "<melodrama>": 50451, "<filipino>": 50423, "<albino bias>": 50300, "<space western>": 50486, "<jukebox musical>": 50602, "<animated cartoon>": 50558, "<live action>": 50589, "<children's issues>": 50450, "<historical drama>": 50618, "<biker film>": 50371, "<foreign legion>": 50285, "<detective>": 50354, "<horror>": 50445, "<inventions & innovations>": 50331, "<children's>": 50409, "<language & literature>": 50387, "<vampire movies>": 50614, "<gay>": 50276, "<master criminal films>": 50541, "<concert film>": 50497, "<spy>": 50485, "<horse racing>": 50374, "<gay interest>": 50294, "<reboot>": 50298, "<short film>": 50470, "<c-movie>": 50420, "<erotica>": 50299, "<fantasy adventure>": 50398, "<filmed play>": 50271, "<historical fiction>": 50551, "<swashbuckler films>": 50321, "<film-opera>": 50396, "<docudrama>": 50591, "<hagiography>": 50468, "<cold war>": 50443, "<family film>": 50274, "<education>": 50403, "<new queer cinema>": 50437, "<monster>": 50502, "<graphic & applied arts>": 50334, "<action thrillers>": 50492, "<journalism>": 50532, "<romance film>": 50471, "<neo-noir>": 50572, "<EOS>": 50258, "<epic western>": 50516, "<slasher>": 50332, "<musical comedy>": 50325, "<experimental film>": 50499, "<ninja movie>": 50422, "<ealing comedies>": 50444, "<interpersonal relationships>": 50407, "<superhero movie>": 50368, "<b-western>": 50537, "<comedy western>": 50347, "<alien invasion>": 50448, "<erotic drama>": 50467, "<comedy thriller>": 50324, "<gangster film>": 50393, "<tokusatsu>": 50596, "<mockumentary>": 50480, "<psycho-biddy>": 50491, "<tragedy>": 50588, "<crime>": 50288, "<crime drama>": 50549, "<fan film>": 50489, "<remake>": 50504, "<hardcore pornography>": 50495, "<biographical film>": 50528, "<drama>": 50544, "<spaghetti western>": 50578, "<avant-garde>": 50605, "<romantic fantasy>": 50620, "<nature>": 50520}
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "gradient_checkpointing": false,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "gpt2",
15
+ "n_ctx": 1024,
16
+ "n_embd": 768,
17
+ "n_head": 12,
18
+ "n_inner": null,
19
+ "n_layer": 12,
20
+ "n_positions": 1024,
21
+ "resid_pdrop": 0.1,
22
+ "summary_activation": null,
23
+ "summary_first_dropout": 0.1,
24
+ "summary_proj_to_labels": true,
25
+ "summary_type": "cls_index",
26
+ "summary_use_proj": true,
27
+ "task_specific_params": {
28
+ "text-generation": {
29
+ "do_sample": true,
30
+ "max_length": 50
31
+ }
32
+ },
33
+ "transformers_version": "4.5.0.dev0",
34
+ "use_cache": true,
35
+ "vocab_size": 50623
36
+ }
eval_results_lm.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ perplexity = 24.264842730635394
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77ca08389b7b19df0263229de11f1cf94c2204aec1dbae0e1c39576464c6f437
3
+ size 511532667
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<BOS>", "eos_token": "<EOS>", "unk_token": "<|endoftext|>", "pad_token": "<PAD>", "additional_special_tokens": ["<comedy-drama>", "<road-horror>", "<war film>", "<northern>", "<therimin music>", "<wuxia>", "<gross out>", "<stoner film>", "<romantic comedy>", "<comedy of manners>", "<combat films>", "<filmed play>", "<gulf war>", "<animation>", "<family film>", "<coming-of-age film>", "<gay>", "<hybrid western>", "<dogme 95>", "<extreme sports>", "<archives and records>", "<prison film>", "<health & fitness>", "<future noir>", "<art film>", "<foreign legion>", "<race movie>", "<bollywood>", "<crime>", "<cyberpunk>", "<comedy film>", "<ensemble film>", "<star vehicle>", "<gross-out film>", "<gay interest>", "<mumblecore>", "<sports>", "<animal picture>", "<reboot>", "<erotica>", "<albino bias>", "<sword and sorcery films>", "<fantasy>", "<demonic child>", "<political cinema>", "<filipino movies>", "<anime>", "<latino>", "<airplanes and airports>", "<cavalry film>", "<slice of life story>", "<auto racing>", "<mondo film>", "<punk rock>", "<erotic thriller>", "<culture & society>", "<rockumentary>", "<crime fiction>", "<prison escape>", "<outlaw biker film>", "<political documetary>", "<swashbuckler films>", "<chick flick>", "<gender issues>", "<comedy thriller>", "<musical comedy>", "<beach party film>", "<science fiction>", "<buddy picture>", "<malayalam cinema>", "<prison>", "<inventions & innovations>", "<slasher>", "<computer animation>", "<graphic & applied arts>", "<blaxploitation>", "<adventure comedy>", "<courtroom drama>", "<musical drama>", "<natural horror films>", "<news>", "<social issues>", "<school story>", "<domestic comedy>", "<screwball comedy>", "<satire>", "<teen>", "<comedy western>", "<libraries and librarians>", "<black comedy>", "<inspirational drama>", "<workplace comedy>", "<martial arts film>", "<plague>", "<detective>", "<animals>", "<addiction drama>", "<apocalyptic and post-apocalyptic fiction>", "<haunted house film>", "<homoeroticism>", "<doomsday film>", "<courtroom comedy>", "<mystery>", "<romantic thriller>", "<anthology>", "<gothic film>", "<christian film>", "<musical>", "<superhero movie>", "<animated musical>", "<sci-fi horror>", "<biker film>", "<psychological horror>", "<expressionism>", "<horse racing>", "<archaeology>", "<propaganda film>", "<illnesses & disabilities>", "<revisionist western>", "<anti-war film>", "<disaster>", "<children's/family>", "<media satire>", "<roadshow/carny>", "<creature film>", "<buddy cop>", "<sponsored film>", "<language & literature>", "<war effort>", "<film adaptation>", "<family-oriented adventure>", "<film noir>", "<feminist film>", "<gangster film>", "<stop motion>", "<pinku eiga>", "<film-opera>", "<sword and sorcery>", "<fantasy adventure>", "<time travel>", "<fantasy drama>", "<beach film>", "<the netherlands in world war ii>", "<education>", "<historical epic>", "<media studies>", "<boxing>", "<interpersonal relationships>", "<suspense>", "<children's>", "<surrealism>", "<anthropology>", "<superhero>", "<adventure>", "<holiday film>", "<pornographic movie>", "<zombie film>", "<slapstick>", "<epic>", "<marriage drama>", "<c-movie>", "<music>", "<ninja movie>", "<filipino>", "<alien film>", "<comedy>", "<heist>", "<czechoslovak new wave>", "<family & personal relationships>", "<women in prison films>", "<movies about gladiators>", "<environmental science>", "<science fiction western>", "<children's fantasy>", "<chase movie>", "<point of view shot>", "<crime comedy>", "<new queer cinema>", "<historical documentaries>", "<baseball>", "<linguistics>", "<werewolf fiction>", "<film & television history>", "<cold war>", "<ealing comedies>", "<horror>", "<british empire film>", "<children's entertainment>", "<alien invasion>", "<period piece>", "<children's issues>", "<melodrama>", "<essay film>", "<roadshow theatrical release>", "<indie>", "<comedy of errors>", "<comedy horror>", "<sci fi pictures original films>", "<pre-code>", "<christmas movie>", "<cult>", "<buddy film>", "<space opera>", "<humour>", "<pornography>", "<psychological thriller>", "<action>", "<erotic drama>", "<hagiography>", "<hip hop movies>", "<short film>", "<romance film>", "<fantasy comedy>", "<heaven-can-wait fantasies>", "<gay pornography>", "<sci-fi adventure>", "<business>", "<anti-war>", "<softcore porn>", "<juvenile delinquency film>", "<mockumentary>", "<nuclear warfare>", "<legal drama>", "<acid western>", "<film>", "<spy>", "<space western>", "<adult>", "<costume horror>", "<fan film>", "<breakdance>", "<psycho-biddy>", "<action thrillers>", "<finance & investing>", "<sex comedy>", "<hardcore pornography>", "<operetta>", "<concert film>", "<z movie>", "<experimental film>", "<world cinema>", "<camp>", "<monster>", "<b-movie>", "<remake>", "<bruceploitation>", "<clay animation>", "<supernatural>", "<political drama>", "<statutory rape>", "<feature film>", "<road movie>", "<dystopia>", "<whodunit>", "<samurai cinema>", "<tragicomedy>", "<epic western>", "<splatter film>", "<movie serial>", "<action/adventure>", "<nature>", "<law & crime>", "<costume adventure>", "<caper story>", "<british new wave>", "<new hollywood>", "<costume drama>", "<mythological fantasy>", "<biographical film>", "<lgbt>", "<tollywood>", "<childhood drama>", "<journalism>", "<dance>", "<computers>", "<political satire>", "<biography>", "<b-western>", "<monster movie>", "<gay themed>", "<romantic drama>", "<master criminal films>", "<existentialism>", "<western>", "<drama>", "<instrumental music>", "<sci-fi thriller>", "<conspiracy fiction>", "<kafkaesque>", "<crime drama>", "<world history>", "<historical fiction>", "<black-and-white>", "<private military company>", "<religious film>", "<jungle film>", "<stand-up comedy>", "<revenge>", "<animated cartoon>", "<americana>", "<parkour in popular culture>", "<chinese movies>", "<singing cowboy>", "<detective fiction>", "<sexploitation>", "<fairy tale>", "<supermarionation>", "<thriller>", "<kitchen sink realism>", "<tamil cinema>", "<bengali cinema>", "<political thriller>", "<neo-noir>", "<goat gland>", "<action comedy>", "<biopic [feature]>", "<television movie>", "<travel>", "<spaghetti western>", "<early black cinema>", "<silhouette animation>", "<heavenly comedy>", "<film à clef>", "<medical fiction>", "<outlaw>", "<silent film>", "<steampunk>", "<parody>", "<tragedy>", "<live action>", "<giallo>", "<docudrama>", "<natural disaster>", "<social problem film>", "<bloopers & candid camera>", "<period horror>", "<tokusatsu>", "<history>", "<indian western>", "<documentary>", "<neorealism>", "<revisionist fairy tale>", "<jukebox musical>", "<coming of age>", "<horror comedy>", "<avant-garde>", "<patriotic film>", "<crime thriller>", "<family drama>", "<sword and sandal>", "<backstage musical>", "<escape film>", "<comdedy>", "<female buddy film>", "<vampire movies>", "<educational>", "<absurdism>", "<fictional film>", "<historical drama>", "<exploitation>", "<romantic fantasy>", "<glamorized spy film>", "<japanese movies>"]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c7a3929e54e245d379a778734d5cb0dd4c18dcf41a0b56b420846c16c6bc767
3
+ size 2287
vocab.json ADDED
The diff for this file is too large to render. See raw diff