Reputation: 982
How do you calculate the cut-off of a rank-size distribution, i.e. the x which splits the distribution so that the top x% categories contain 1-x% of observations? For example, if the cut-off is 20%, the top 20% categories contain 80% of the observations.
In the sample below, the categories are cbsa_code
, frequency is Freq
, and category rank by frequency is rank
(some ranks can be equal and decimal because they were calculated with ties.method = "average"
):
structure(list(cbsa_code = c("35620", "41860", "31080", "41940",
"14460", "47900", "16980", "42660", "33100", "19100", "37980",
"12420", "12060", "41740", "26420", "38900", "38060", "19740",
"33460", "29820", "19820", "36740", "45300", "14500", "14860",
"12580", "17140", "40900", "38300", "41620", "28140", "34980",
"41180", "20500", "39580", "16740", "17460", "40140", "18140",
"39340", "41700", "27260", "33340", "26900", "31540", "10580",
"11460", "32820", "35380", "39300", "35300", "42100", "12540",
"36420", "40060", "45940", "46140", "25540", "31140", "40380",
"42200", "46520", "15380", "24340", "35840", "47260", "46060",
"17820", "49340", "31700", "36540", "37100", "10420", "15980",
"16700", "19780", "30780", "42220", "10740", "22660", "23540",
"28940", "39900", "13820", "16580", "16860", "14260", "16940",
"24860", "29620", "34940", "45780", "16820", "43620", "11700",
"23420", "25420", "34820", "41500", "12940", "22220", "26620",
"33860", "41540", "44060", "48900", "11260", "14740", "20100",
"21660", "27060", "10900", "14580", "15540", "17900", "21340",
"24660", "29540", "37340", "38860", "44140", "45220", "19380",
"19660", "26980", "29940", "30460", "36260", "42020", "42340",
"46340", "11540", "12020", "13380", "17020", "17200", "18700",
"24580", "24780", "25500", "25900", "26820", "27540", "27940",
"32780", "34900", "35980", "37860", "42540", "43900", "44300",
"45060", "46700", "48620", "11100", "12220", "12300", "13460",
"18180", "19340", "20780", "22140", "27140", "27220", "28420",
"28700", "28740", "30700", "33260", "33780", "34580", "36860",
"39460", "41420", "44700", "47380", "48660", "49180", "12700",
"13100", "13780", "13980", "17660", "18580", "20020", "20260",
"21140", "21500", "23060", "23860", "24060", "25180", "25200",
"25860", "26380", "26500", "26740", "27980", "29340", "29460",
"31060", "36100", "36780", "37460", "38940", "40420", "42140",
"43780", "44180", "44500", "45860", "46660", "48700", "49620",
"10780", "11900", "12260", "13220", "13900", "14100", "14540",
"15940", "16180", "16300", "17340", "17780", "17860", "17980",
"18660", "20140", "20220", "20420", "20700", "21260", "22020",
"22500", "22520", "22900", "23180", "23580", "23820", "24300",
"24420", "24540", "24900", "24940", "25060", "25940", "28020",
"28060", "28580", "29300", "29660", "30020", "30140", "30420",
"30860", "30980", "31180", "31340", "31900", "32140", "32580",
"33140", "33220", "33540", "36140", "36500", "36700", "36980",
"37900", "38540", "39140", "39420", "39540", "39660", "39740",
"40660", "41140", "42700", "43060", "43100", "43380", "43580",
"44100", "44260", "44920", "45460", "45900", "46020", "46540",
"47460", "49300", "49420", "10100", "10140", "10460", "10620",
"10700", "11500", "11740", "11820", "12620", "12740", "12900",
"12980", "13140", "13180", "13740", "14010", "14140", "14380",
"14700", "15220", "15260", "15500", "15620", "15740", "16100",
"16220", "16540", "17220", "17300", "17580", "17740", "18020",
"18220", "18260", "19060", "19140", "19180", "19300", "19580",
"20380", "20660", "20740", "20820", "21220", "21540", "21580",
"21700", "21780", "21840", "21900", "22180", "22420", "23300",
"23460", "23660", "23900", "24020", "24220", "24260", "24620",
"24640", "25620", "26780", "26860", "27420", "27460", "27900",
"28100", "28380", "28660", "28780", "29060", "29100", "29200",
"29380", "29700", "29980", "30300", "30340", "31020", "31300",
"31380", "31940", "32220", "32260", "32380", "33700", "33740",
"34100", "34340", "34460", "34620", "34700", "34780", "34860",
"35460", "35740", "36460", "36940", "37120", "38240", "38380",
"38820", "39020", "39060", "39820", "39940", "39980", "40220",
"40260", "40340", "40860", "40980", "41060", "41820", "42820",
"42860", "42940", "43140", "43260", "43340", "43420", "43740",
"44220", "44420", "44460", "44660", "45180", "45340", "45380",
"45620", "45820", "46220", "46300", "46740", "46980", "47180",
"47220", "47240", "47300", "47620", "47700", "47940", "48020",
"48060", "48140", "48260", "48580", "48780", "49080", "49220",
"49660", "10180", "10220", "10300", "10500", "10540", "10660",
"10820", "10860", "10940", "10980", "11020", "11060", "11140",
"11180", "11220", "11380", "11420", "11580", "11620", "11660",
"11680", "11780", "11860", "11940", "11980", "12100", "12140",
"12180", "12380", "12460", "12660", "12680", "12780", "12820",
"12860", "13020", "13060", "13260", "13300", "13340", "13420",
"13500", "13540", "13620", "13660", "13700", "13720", "13940",
"14020", "14180", "14220", "14340", "14420", "14620", "14660",
"14720", "14780", "14820", "15020", "15060", "15100", "15180",
"15340", "15420", "15460", "15580", "15660", "15680", "15700",
"15780", "15820", "15860", "15900", "16020", "16060", "16260",
"16340", "16380", "16460", "16500", "16620", "16660", "17060",
"17260", "17380", "17420", "17500", "17540", "17700", "18060",
"18100", "18300", "18380", "18420", "18460", "18500", "18620",
"18740", "18780", "18820", "18860", "18880", "18900", "18980",
"19000", "19220", "19260", "19420", "19460", "19500", "19540",
"19620", "19700", "19760", "19860", "19940", "19980", "20060",
"20180", "20300", "20340", "20460", "20540", "20580", "20900",
"20940", "20980", "21020", "21060", "21120", "21180", "21300",
"21380", "21420", "21460", "21740", "21820", "21980", "22060",
"22100", "22260", "22280", "22300", "22340", "22380", "22540",
"22580", "22620", "22700", "22780", "22800", "22820", "22860",
"23140", "23240", "23340", "23380", "23500", "23620", "23700",
"23780", "23940", "23980", "24100", "24140", "24380", "24460",
"24500", "24700", "24740", "24820", "24980", "25100", "25220",
"25260", "25300", "25460", "25580", "25700", "25720", "25740",
"25760", "25780", "25820", "25840", "25880", "25980", "26020",
"26090", "26140", "26220", "26300", "26340", "26460", "26540",
"26580", "26660", "26700", "26940", "26960", "27020", "27100",
"27160", "27180", "27300", "27340", "27380", "27500", "27600",
"27620", "27700", "27740", "27780", "27860", "27920", "28180",
"28260", "28300", "28340", "28500", "28540", "28620", "28820",
"28860", "28900", "29020", "29180", "29260", "29420", "29500",
"29740", "29780", "29860", "29900", "30060", "30220", "30260",
"30280", "30380", "30580", "30620", "30660", "30820", "30880",
"30900", "30940", "31220", "31260", "31420", "31460", "31500",
"31580", "31620", "31660", "31680", "31740", "31820", "31860",
"31930", "31980", "32000", "32020", "32100", "32180", "32280",
"32300", "32340", "32460", "32500", "32540", "32620", "32660",
"32700", "32740", "32860", "32900", "32940", "32980", "33020",
"33060", "33180", "33300", "33420", "33500", "33580", "33620",
"33660", "33940", "33980", "34020", "34060", "34140", "34180",
"34220", "34260", "34300", "34380", "34420", "34500", "34540",
"34660", "34740", "35020", "35060", "35100", "35140", "35220",
"35260", "35420", "35440", "35500", "35580", "35660", "35700",
"35820", "35860", "35900", "35940", "36020", "36220", "36300",
"36340", "36380", "36580", "36620", "36660", "36820", "36830",
"36840", "36900", "37020", "37060", "37080", "37140", "37220",
"37260", "37300", "37420", "37500", "37540", "37580", "37620",
"37660", "37740", "37780", "37940", "38100", "38180", "38220",
"38260", "38340", "38420", "38460", "38500", "38580", "38620",
"38700", "38740", "38780", "38840", "38920", "39220", "39260",
"39380", "39500", "39700", "39780", "39860", "40080", "40100",
"40180", "40300", "40460", "40540", "40580", "40620", "40700",
"40740", "40780", "40820", "40940", "41100", "41220", "41400",
"41460", "41660", "41760", "41780", "42300", "42380", "42420",
"42460", "42620", "42680", "42740", "42780", "42900", "42980",
"43020", "43180", "43220", "43300", "43320", "43460", "43500",
"43660", "43700", "43760", "43940", "43980", "44020", "44340",
"44540", "44580", "44620", "44740", "44780", "44860", "44900",
"44940", "44980", "45000", "45020", "45140", "45500", "45520",
"45540", "45580", "45660", "45700", "45740", "45980", "46100",
"46180", "46380", "46460", "46500", "46620", "46780", "46820",
"46860", "46900", "47020", "47080", "47340", "47420", "47540",
"47580", "47660", "47780", "47820", "47920", "47980", "48100",
"48180", "48220", "48300", "48460", "48540", "48820", "48940",
"48980", "49020", "49100", "49260", "49380", "49460", "49700",
"49740", "49780", "49820"), Freq = c(1812L, 1558L, 1052L, 622L,
514L, 455L, 395L, 393L, 311L, 266L, 261L, 259L, 249L, 213L, 204L,
156L, 151L, 141L, 95L, 92L, 91L, 91L, 84L, 76L, 71L, 70L, 68L,
66L, 64L, 64L, 61L, 59L, 52L, 50L, 46L, 45L, 44L, 44L, 40L, 38L,
38L, 36L, 35L, 34L, 32L, 31L, 30L, 30L, 29L, 28L, 27L, 26L, 25L,
25L, 25L, 24L, 23L, 21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L,
19L, 17L, 17L, 16L, 16L, 16L, 15L, 15L, 15L, 15L, 15L, 15L, 14L,
14L, 14L, 14L, 14L, 13L, 13L, 13L, 12L, 12L, 12L, 12L, 12L, 12L,
11L, 11L, 10L, 10L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
8L, 8L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), rank = c(1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21.5, 21.5, 23, 24, 25, 26, 27, 28, 29.5, 29.5, 31, 32, 33, 34,
35, 36, 37.5, 37.5, 39, 40.5, 40.5, 42, 43, 44, 45, 46, 47.5,
47.5, 49, 50, 51, 52, 54, 54, 54, 56, 57, 60, 60, 60, 60, 60,
64.5, 64.5, 64.5, 64.5, 67, 68.5, 68.5, 71, 71, 71, 75.5, 75.5,
75.5, 75.5, 75.5, 75.5, 81, 81, 81, 81, 81, 85, 85, 85, 89.5,
89.5, 89.5, 89.5, 89.5, 89.5, 93.5, 93.5, 97, 97, 97, 97, 97,
103, 103, 103, 103, 103, 103, 103, 109, 109, 109, 109, 109, 117,
117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 127, 127, 127,
127, 127, 127, 127, 127, 127, 143, 143, 143, 143, 143, 143, 143,
143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143,
143, 143, 143, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 196.5,
196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5,
196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5,
196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5,
196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 196.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5,
254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 254.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 370.5,
370.5, 370.5, 370.5, 370.5, 370.5, 370.5, 447, 448, 449, 450,
451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463,
464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476,
477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489,
490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502,
503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515,
516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528,
529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541,
542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554,
555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567,
568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580,
581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593,
594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606,
607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619,
620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632,
633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645,
646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658,
659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671,
672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684,
685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697,
698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710,
711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723,
724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736,
737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749,
750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762,
763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775,
776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788,
789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801,
802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814,
815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827,
828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840,
841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853,
854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866,
867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879,
880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892,
893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905,
906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917)), .Names = c("cbsa_code",
"Freq", "rank"), row.names = c(597L, 742L, 488L, 743L, 103L,
878L, 159L, 756L, 539L, 207L, 654L, 54L, 46L, 738L, 378L, 676L,
655L, 223L, 547L, 459L, 226L, 623L, 820L, 104L, 114L, 57L, 162L,
722L, 661L, 735L, 422L, 583L, 728L, 242L, 691L, 155L, 170L, 704L,
185L, 685L, 737L, 401L, 545L, 390L, 499L, 10L, 30L, 532L, 591L,
684L, 590L, 745L, 56L, 615L, 701L, 836L, 841L, 356L, 489L, 710L,
747L, 849L, 122L, 328L, 602L, 865L, 839L, 178L, 908L, 504L, 618L,
635L, 6L, 138L, 154L, 225L, 479L, 748L, 14L, 293L, 310L, 440L,
698L, 89L, 151L, 157L, 99L, 158L, 342L, 454L, 582L, 832L, 156L,
781L, 37L, 307L, 353L, 579L, 733L, 67L, 281L, 383L, 556L, 734L,
791L, 898L, 27L, 111L, 232L, 268L, 395L, 18L, 106L, 126L, 180L,
261L, 337L, 453L, 641L, 675L, 793L, 819L, 214L, 221L, 393L, 462L,
474L, 611L, 744L, 750L, 845L, 32L, 45L, 78L, 160L, 163L, 197L,
334L, 340L, 355L, 368L, 388L, 408L, 417L, 531L, 581L, 606L, 651L,
754L, 787L, 797L, 816L, 853L, 893L, 23L, 50L, 52L, 80L, 186L,
213L, 248L, 279L, 397L, 400L, 428L, 434L, 435L, 478L, 543L, 555L,
573L, 628L, 688L, 731L, 806L, 868L, 894L, 904L, 61L, 71L, 88L,
92L, 174L, 194L, 230L, 236L, 256L, 265L, 300L, 317L, 322L, 348L,
349L, 366L, 377L, 380L, 386L, 418L, 448L, 451L, 487L, 608L, 624L,
643L, 678L, 711L, 746L, 786L, 794L, 801L, 834L, 852L, 895L, 912L,
15L, 42L, 51L, 74L, 90L, 95L, 105L, 137L, 142L, 145L, 167L, 177L,
179L, 181L, 196L, 233L, 235L, 240L, 246L, 259L, 276L, 288L, 289L,
299L, 302L, 311L, 316L, 327L, 330L, 333L, 343L, 344L, 346L, 369L,
419L, 420L, 431L, 447L, 455L, 464L, 466L, 473L, 481L, 485L, 490L,
494L, 508L, 515L, 526L, 540L, 542L, 549L, 609L, 617L, 622L, 631L,
652L, 667L, 681L, 687L, 690L, 692L, 694L, 716L, 727L, 758L, 767L,
768L, 776L, 780L, 792L, 796L, 811L, 823L, 835L, 838L, 850L, 870L,
907L, 910L, 1L, 2L, 7L, 11L, 13L, 31L, 38L, 40L, 58L, 62L, 66L,
68L, 72L, 73L, 87L, 93L, 96L, 101L, 109L, 119L, 120L, 125L, 128L,
132L, 141L, 143L, 150L, 164L, 166L, 173L, 176L, 182L, 187L, 188L,
206L, 208L, 209L, 212L, 219L, 239L, 245L, 247L, 249L, 258L, 266L,
267L, 269L, 271L, 273L, 274L, 280L, 287L, 304L, 308L, 313L, 318L,
321L, 325L, 326L, 335L, 336L, 358L, 387L, 389L, 405L, 406L, 415L,
421L, 427L, 433L, 436L, 442L, 443L, 445L, 449L, 456L, 463L, 470L,
471L, 486L, 493L, 495L, 510L, 517L, 518L, 522L, 553L, 554L, 561L,
567L, 570L, 574L, 576L, 578L, 580L, 594L, 600L, 616L, 630L, 636L,
659L, 663L, 673L, 679L, 680L, 696L, 699L, 700L, 706L, 707L, 709L,
721L, 724L, 725L, 741L, 761L, 762L, 764L, 769L, 772L, 775L, 777L,
784L, 795L, 799L, 800L, 805L, 818L, 821L, 822L, 828L, 833L, 843L,
844L, 854L, 859L, 862L, 863L, 864L, 866L, 873L, 875L, 880L, 882L,
883L, 885L, 888L, 892L, 896L, 902L, 905L, 913L, 3L, 4L, 5L, 8L,
9L, 12L, 16L, 17L, 19L, 20L, 21L, 22L, 24L, 25L, 26L, 28L, 29L,
33L, 34L, 35L, 36L, 39L, 41L, 43L, 44L, 47L, 48L, 49L, 53L, 55L,
59L, 60L, 63L, 64L, 65L, 69L, 70L, 75L, 76L, 77L, 79L, 81L, 82L,
83L, 84L, 85L, 86L, 91L, 94L, 97L, 98L, 100L, 102L, 107L, 108L,
110L, 112L, 113L, 115L, 116L, 117L, 118L, 121L, 123L, 124L, 127L,
129L, 130L, 131L, 133L, 134L, 135L, 136L, 139L, 140L, 144L, 146L,
147L, 148L, 149L, 152L, 153L, 161L, 165L, 168L, 169L, 171L, 172L,
175L, 183L, 184L, 189L, 190L, 191L, 192L, 193L, 195L, 198L, 199L,
200L, 201L, 202L, 203L, 204L, 205L, 210L, 211L, 215L, 216L, 217L,
218L, 220L, 222L, 224L, 227L, 228L, 229L, 231L, 234L, 237L, 238L,
241L, 243L, 244L, 250L, 251L, 252L, 253L, 254L, 255L, 257L, 260L,
262L, 263L, 264L, 270L, 272L, 275L, 277L, 278L, 282L, 283L, 284L,
285L, 286L, 290L, 291L, 292L, 294L, 295L, 296L, 297L, 298L, 301L,
303L, 305L, 306L, 309L, 312L, 314L, 315L, 319L, 320L, 323L, 324L,
329L, 331L, 332L, 338L, 339L, 341L, 345L, 347L, 350L, 351L, 352L,
354L, 357L, 359L, 360L, 361L, 362L, 363L, 364L, 365L, 367L, 370L,
371L, 372L, 373L, 374L, 375L, 376L, 379L, 381L, 382L, 384L, 385L,
391L, 392L, 394L, 396L, 398L, 399L, 402L, 403L, 404L, 407L, 409L,
410L, 411L, 412L, 413L, 414L, 416L, 423L, 424L, 425L, 426L, 429L,
430L, 432L, 437L, 438L, 439L, 441L, 444L, 446L, 450L, 452L, 457L,
458L, 460L, 461L, 465L, 467L, 468L, 469L, 472L, 475L, 476L, 477L,
480L, 482L, 483L, 484L, 491L, 492L, 496L, 497L, 498L, 500L, 501L,
502L, 503L, 505L, 506L, 507L, 509L, 511L, 512L, 513L, 514L, 516L,
519L, 520L, 521L, 523L, 524L, 525L, 527L, 528L, 529L, 530L, 533L,
534L, 535L, 536L, 537L, 538L, 541L, 544L, 546L, 548L, 550L, 551L,
552L, 557L, 558L, 559L, 560L, 562L, 563L, 564L, 565L, 566L, 568L,
569L, 571L, 572L, 575L, 577L, 584L, 585L, 586L, 587L, 588L, 589L,
592L, 593L, 595L, 596L, 598L, 599L, 601L, 603L, 604L, 605L, 607L,
610L, 612L, 613L, 614L, 619L, 620L, 621L, 625L, 626L, 627L, 629L,
632L, 633L, 634L, 637L, 638L, 639L, 640L, 642L, 644L, 645L, 646L,
647L, 648L, 649L, 650L, 653L, 656L, 657L, 658L, 660L, 662L, 664L,
665L, 666L, 668L, 669L, 670L, 671L, 672L, 674L, 677L, 682L, 683L,
686L, 689L, 693L, 695L, 697L, 702L, 703L, 705L, 708L, 712L, 713L,
714L, 715L, 717L, 718L, 719L, 720L, 723L, 726L, 729L, 730L, 732L,
736L, 739L, 740L, 749L, 751L, 752L, 753L, 755L, 757L, 759L, 760L,
763L, 765L, 766L, 770L, 771L, 773L, 774L, 778L, 779L, 782L, 783L,
785L, 788L, 789L, 790L, 798L, 802L, 803L, 804L, 807L, 808L, 809L,
810L, 812L, 813L, 814L, 815L, 817L, 824L, 825L, 826L, 827L, 829L,
830L, 831L, 837L, 840L, 842L, 846L, 847L, 848L, 851L, 855L, 856L,
857L, 858L, 860L, 861L, 867L, 869L, 871L, 872L, 874L, 876L, 877L,
879L, 881L, 884L, 886L, 887L, 889L, 890L, 891L, 897L, 899L, 900L,
901L, 903L, 906L, 909L, 911L, 914L, 915L, 916L, 917L), class = "data.frame")
EDIT: The provided ranks should be disregarded, as ties.methods = "average"
is the wrong method to use in this case.
Upvotes: 2
Views: 893
Reputation: 982
Based on the very nice solution by @JorysMeys, here is a leaner solution, which only requires the frequency distribution as an input and which returns the matching share of observations in addition to the cutoff (because the sum will seldom be 100%).
get_cutoff <- function(freq){
# remove NA values from distribution (make sure NA doesn't mean zero before running the function)
freq <- freq[!is.na(freq)]
# order distribution by decreasing frequency
freq <- freq[order(-freq)]
# subtract 100% from cumulative frequency share plus rank share
pdiff <- cumsum(freq/sum(freq)) + seq(1,length(freq))/length(freq) - 1
# position (=rank) of smallest absolute difference (generally not 0 since ranks are discrete)
pos <- which.min(abs(pdiff))
# return cutoff of rank share and matching cumulative frequency share
return(c(pos/length(freq), sum(freq[1:pos])/sum(freq)))
}
The reason why there is no need to calculate rank beforehand is that the cutoff should not depend on the ties.method
argument of rank(). If you do calculate rank beforehand and apply the other solution, then you should use ties.method = "random"
. Other methods will give uninterpretable results. This solution calculates a simple rank with seq(1,length(freq))
, which is strictly equivalent to ties.method = "random"
.
Upvotes: 0
Reputation: 108583
I reckon that the top x% categories are defined by the rank, so that the top x% categories is the categories with a rank lower than max(rank)*x/100
. Then it's actually quite easy to do.
In R code this could be done like this :
get_cutoff <- function(rank, freq){
counts <- tapply(freq, rank, sum, na.rm = TRUE)
ranks <- as.numeric(names(counts))
pdiff <- cumsum(counts / sum(counts)) + ranks/max(ranks) - 1
pos <- which.min(abs(pdiff))
return(ranks[pos]/max(ranks))
}
Storing your structure in a data frame called mydf
gives the following:
get_cutoff(mydf$rank, mydf$Freq) [1] 0.08833152
To check for yourself this is correct, you can do:
> counts <- with(mydf, tapply(Freq, rank, sum, na.rm = TRUE))
> ranks <- as.numeric(names(counts))
> get_cutoff(mydf$rank, mydf$Freq) * max(ranks)
[1] 81
> which(ranks == 81)
[1] 57
> sum(counts[1:57])/sum(counts)
[1] 0.915586
> sum(counts[1:57])/sum(counts) + 81/max(ranks)
[1] 1.003918
Due to the discrete nature of ranks, only in specific cases this will be a 100% perfect solution. The algorithm above finds the fraction tied to the rank that gives you the result closest to the perfect solution.
Upvotes: 2