FrameNet (fulltext)
corpuses and documents
number of corpuses
SELECT COUNT(*) FROM fncorpuses
COUNT(*)
16
number of documents
SELECT COUNT(*) FROM fndocuments
COUNT(*)
2392
distribution of documents per corpus
SELECT corpus,COUNT(*) FROM fncorpuses LEFT JOIN fndocuments USING (corpusid) GROUP BY corpusid ORDER BY corpus
corpusCOUNT(*)
ANC13
ANC_r51
AP1460
BNC2750
BNC2_cxn1
C-41
KBEval9
LUCorpus-v0.320
MASC36
Miscellaneous3
NTI20
NYTIMES19
Orwell1
Pickett1
PropBank6
SemAnno1
distribution of sentences per document
SELECT d.corpusid,s.corpusid,documentdesc,COUNT(*) AS c FROM fndocuments AS d LEFT JOIN fnsentences AS s USING (documentid) GROUP BY documentid ORDER BY c DESC LIMIT 20
corpusidcorpusiddocumentdescc
111111bncp25516
194194WhereToHongKong455
164164IranRelatedQuestions259
174174Hound-Ch14258
184184parc172
204204enron-thread-159550151
194194HistoryOfLasVegas145
134134Iran_Chemical136
134134Iran_Missile126
194194HistoryOfGreece125
194194HistoryOfJerusalem121
134134NorthKorea_NuclearOverview119
184184atm115
113113apwsE950310.0153113
134134Iran_Nuclear101
150150justify99
150150player98
150150curious98
204204IZ-060316-01-Trans-198
150150familiar97
distribution of sentences and documents per corpus
SELECT corpus,SUM(c) AS nsentences,COUNT(documentdesc) AS ndocuments,SUBSTRING(GROUP_CONCAT(documentdesc ORDER BY documentdesc),1,80) FROM ( SELECT d.corpusid,documentdesc,COUNT(*) AS c FROM fndocuments AS d LEFT JOIN fnsentences AS s USING (documentid) GROUP BY documentid ) AS s LEFT JOIN fncorpuses USING (corpusid) GROUP BY s.corpusid ORDER BY nsentences DESC
corpusnsentencesndocumentsSUBSTRING(GROUP_CONCAT(documentdesc ORDER BY documentdesc),1,80)
BNC226541750bncp,bncp=10,bncp=1004,bncp=1015,bncp=1017,bncp=1018,bncp=1023,bncp=1024,bncp=10
MASC241536add,chance,color,cool,curious,date,entitle,exercisen,fair,familiar,forget,image,
AP16841460apwsE941111.0319,apwsE941111.0431,apwsE941112.0096,apwsE941113.0042,apwsE941113.
NTI123020BWTutorial_chapter1,ChinaOverview,Iran_Biological,Iran_Chemical,Iran_Introductio
ANC119213110CYL067,110CYL068,110CYL072,110CYL200,112C-L013,EntrepreneurAsMadonna,HistoryO
KBEval8739atm,Brandeis,cycorp,LCC-M,lcch,MIT,parc,Stanford,utd-icsi
LUCorpus-v0.37702020000410_nyt-NEW,20000415_apw_eng-NEW,20000416_xin_eng-NEW,20000419_apw_eng-NEW,
ANC_r30951IranRelatedQuestions,written_1%fiction%eggan%TheStory,written_1%fiction%hargrave
PropBank2846AetnaLifeAndCasualty,BellRinging,ElectionVictory,LomaPrieta,PolemicProgressiveEd
Miscellaneous2703Hijack,Hound-Ch14,SadatAssassination
Orwell191Orwell_1984_p1
NYTIMES1919apwsE980302.0253,apwsE980303.0199,apwsE980303.0902,apwsE980305.0005,apwsE980305.
C-4111C-4Text
SemAnno61Text1
Pickett51cjf001
BNC2_cxn21rate.speed_080313